github.com/bir3/gocompiler@v0.9.2202/src/cmd/link/internal/ld/deadcode.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ld 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/internal/goobj" 9 "github.com/bir3/gocompiler/src/cmd/internal/objabi" 10 "github.com/bir3/gocompiler/src/cmd/internal/sys" 11 "github.com/bir3/gocompiler/src/cmd/link/internal/loader" 12 "github.com/bir3/gocompiler/src/cmd/link/internal/sym" 13 "fmt" 14 "github.com/bir3/gocompiler/src/internal/buildcfg" 15 "strings" 16 "unicode" 17 ) 18 19 var _ = fmt.Print 20 21 type deadcodePass struct { 22 ctxt *Link 23 ldr *loader.Loader 24 wq heap // work queue, using min-heap for better locality 25 26 ifaceMethod map[methodsig]bool // methods called from reached interface call sites 27 genericIfaceMethod map[string]bool // names of methods called from reached generic interface call sites 28 markableMethods []methodref // methods of reached types 29 reflectSeen bool // whether we have seen a reflect method call 30 dynlink bool 31 32 methodsigstmp []methodsig // scratch buffer for decoding method signatures 33 pkginits []loader.Sym 34 mapinitnoop loader.Sym 35 } 36 37 func (d *deadcodePass) init() { 38 d.ldr.InitReachable() 39 d.ifaceMethod = make(map[methodsig]bool) 40 d.genericIfaceMethod = make(map[string]bool) 41 if buildcfg.Experiment.FieldTrack { 42 d.ldr.Reachparent = make([]loader.Sym, d.ldr.NSym()) 43 } 44 d.dynlink = d.ctxt.DynlinkingGo() 45 46 if d.ctxt.BuildMode == BuildModeShared { 47 // Mark all symbols defined in this library as reachable when 48 // building a shared library. 49 n := d.ldr.NDef() 50 for i := 1; i < n; i++ { 51 s := loader.Sym(i) 52 d.mark(s, 0) 53 } 54 d.mark(d.ctxt.mainInittasks, 0) 55 return 56 } 57 58 var names []string 59 60 // In a normal binary, start at main.main and the init 61 // functions and mark what is reachable from there. 62 if d.ctxt.linkShared && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) { 63 names = append(names, "main.main", "main..inittask") 64 } else { 65 // The external linker refers main symbol directly. 66 if d.ctxt.LinkMode == LinkExternal && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) { 67 if d.ctxt.HeadType == objabi.Hwindows && d.ctxt.Arch.Family == sys.I386 { 68 *flagEntrySymbol = "_main" 69 } else { 70 *flagEntrySymbol = "main" 71 } 72 } 73 names = append(names, *flagEntrySymbol) 74 } 75 // runtime.unreachableMethod is a function that will throw if called. 76 // We redirect unreachable methods to it. 77 names = append(names, "runtime.unreachableMethod") 78 if d.ctxt.BuildMode == BuildModePlugin { 79 names = append(names, objabi.PathToPrefix(*flagPluginPath)+"..inittask", objabi.PathToPrefix(*flagPluginPath)+".main", "go:plugin.tabs") 80 81 // We don't keep the go.plugin.exports symbol, 82 // but we do keep the symbols it refers to. 83 exportsIdx := d.ldr.Lookup("go:plugin.exports", 0) 84 if exportsIdx != 0 { 85 relocs := d.ldr.Relocs(exportsIdx) 86 for i := 0; i < relocs.Count(); i++ { 87 d.mark(relocs.At(i).Sym(), 0) 88 } 89 } 90 } 91 92 if d.ctxt.Debugvlog > 1 { 93 d.ctxt.Logf("deadcode start names: %v\n", names) 94 } 95 96 for _, name := range names { 97 // Mark symbol as a data/ABI0 symbol. 98 d.mark(d.ldr.Lookup(name, 0), 0) 99 if abiInternalVer != 0 { 100 // Also mark any Go functions (internal ABI). 101 d.mark(d.ldr.Lookup(name, abiInternalVer), 0) 102 } 103 } 104 105 // All dynamic exports are roots. 106 for _, s := range d.ctxt.dynexp { 107 if d.ctxt.Debugvlog > 1 { 108 d.ctxt.Logf("deadcode start dynexp: %s<%d>\n", d.ldr.SymName(s), d.ldr.SymVersion(s)) 109 } 110 d.mark(s, 0) 111 } 112 113 d.mapinitnoop = d.ldr.Lookup("runtime.mapinitnoop", abiInternalVer) 114 if d.mapinitnoop == 0 { 115 panic("could not look up runtime.mapinitnoop") 116 } 117 if d.ctxt.mainInittasks != 0 { 118 d.mark(d.ctxt.mainInittasks, 0) 119 } 120 } 121 122 func (d *deadcodePass) flood() { 123 var methods []methodref 124 for !d.wq.empty() { 125 symIdx := d.wq.pop() 126 127 // Methods may be called via reflection. Give up on static analysis, 128 // and mark all exported methods of all reachable types as reachable. 129 d.reflectSeen = d.reflectSeen || d.ldr.IsReflectMethod(symIdx) 130 131 isgotype := d.ldr.IsGoType(symIdx) 132 relocs := d.ldr.Relocs(symIdx) 133 var usedInIface bool 134 135 if isgotype { 136 if d.dynlink { 137 // When dynamic linking, a type may be passed across DSO 138 // boundary and get converted to interface at the other side. 139 d.ldr.SetAttrUsedInIface(symIdx, true) 140 } 141 usedInIface = d.ldr.AttrUsedInIface(symIdx) 142 } 143 144 methods = methods[:0] 145 for i := 0; i < relocs.Count(); i++ { 146 r := relocs.At(i) 147 if r.Weak() { 148 convertWeakToStrong := false 149 // When build with "-linkshared", we can't tell if the 150 // interface method in itab will be used or not. 151 // Ignore the weak attribute. 152 if d.ctxt.linkShared && d.ldr.IsItab(symIdx) { 153 convertWeakToStrong = true 154 } 155 // If the program uses plugins, we can no longer treat 156 // relocs from pkg init functions to outlined map init 157 // fragments as weak, since doing so can cause package 158 // init clashes between the main program and the 159 // plugin. See #62430 for more details. 160 if d.ctxt.canUsePlugins && r.Type().IsDirectCall() { 161 convertWeakToStrong = true 162 } 163 if !convertWeakToStrong { 164 // skip this reloc 165 continue 166 } 167 } 168 t := r.Type() 169 switch t { 170 case objabi.R_METHODOFF: 171 if i+2 >= relocs.Count() { 172 panic("expect three consecutive R_METHODOFF relocs") 173 } 174 if usedInIface { 175 methods = append(methods, methodref{src: symIdx, r: i}) 176 // The method descriptor is itself a type descriptor, and 177 // it can be used to reach other types, e.g. by using 178 // reflect.Type.Method(i).Type.In(j). We need to traverse 179 // its child types with UsedInIface set. (See also the 180 // comment below.) 181 rs := r.Sym() 182 if !d.ldr.AttrUsedInIface(rs) { 183 d.ldr.SetAttrUsedInIface(rs, true) 184 if d.ldr.AttrReachable(rs) { 185 d.ldr.SetAttrReachable(rs, false) 186 d.mark(rs, symIdx) 187 } 188 } 189 } 190 i += 2 191 continue 192 case objabi.R_USETYPE: 193 // type symbol used for DWARF. we need to load the symbol but it may not 194 // be otherwise reachable in the program. 195 // do nothing for now as we still load all type symbols. 196 continue 197 case objabi.R_USEIFACE: 198 // R_USEIFACE is a marker relocation that tells the linker the type is 199 // converted to an interface, i.e. should have UsedInIface set. See the 200 // comment below for why we need to unset the Reachable bit and re-mark it. 201 rs := r.Sym() 202 if d.ldr.IsItab(rs) { 203 // This relocation can also point at an itab, in which case it 204 // means "the _type field of that itab". 205 rs = decodeItabType(d.ldr, d.ctxt.Arch, rs) 206 } 207 if !d.ldr.IsGoType(rs) && !d.ctxt.linkShared { 208 panic(fmt.Sprintf("R_USEIFACE in %s references %s which is not a type or itab", d.ldr.SymName(symIdx), d.ldr.SymName(rs))) 209 } 210 if !d.ldr.AttrUsedInIface(rs) { 211 d.ldr.SetAttrUsedInIface(rs, true) 212 if d.ldr.AttrReachable(rs) { 213 d.ldr.SetAttrReachable(rs, false) 214 d.mark(rs, symIdx) 215 } 216 } 217 continue 218 case objabi.R_USEIFACEMETHOD: 219 // R_USEIFACEMETHOD is a marker relocation that marks an interface 220 // method as used. 221 rs := r.Sym() 222 if d.ctxt.linkShared && (d.ldr.SymType(rs) == sym.SDYNIMPORT || d.ldr.SymType(rs) == sym.Sxxx) { 223 // Don't decode symbol from shared library (we'll mark all exported methods anyway). 224 // We check for both SDYNIMPORT and Sxxx because name-mangled symbols haven't 225 // been resolved at this point. 226 continue 227 } 228 m := d.decodeIfaceMethod(d.ldr, d.ctxt.Arch, rs, r.Add()) 229 if d.ctxt.Debugvlog > 1 { 230 d.ctxt.Logf("reached iface method: %v\n", m) 231 } 232 d.ifaceMethod[m] = true 233 continue 234 case objabi.R_USENAMEDMETHOD: 235 name := d.decodeGenericIfaceMethod(d.ldr, r.Sym()) 236 if d.ctxt.Debugvlog > 1 { 237 d.ctxt.Logf("reached generic iface method: %s\n", name) 238 } 239 d.genericIfaceMethod[name] = true 240 continue // don't mark referenced symbol - it is not needed in the final binary. 241 case objabi.R_INITORDER: 242 // inittasks has already run, so any R_INITORDER links are now 243 // superfluous - the only live inittask records are those which are 244 // in a scheduled list somewhere (e.g. runtime.moduledata.inittasks). 245 continue 246 } 247 rs := r.Sym() 248 if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) { 249 // If a type is converted to an interface, it is possible to obtain an 250 // interface with a "child" type of it using reflection (e.g. obtain an 251 // interface of T from []chan T). We need to traverse its "child" types 252 // with UsedInIface attribute set. 253 // When visiting the child type (chan T in the example above), it will 254 // have UsedInIface set, so it in turn will mark and (re)visit its children 255 // (e.g. T above). 256 // We unset the reachable bit here, so if the child type is already visited, 257 // it will be visited again. 258 // Note that a type symbol can be visited at most twice, one without 259 // UsedInIface and one with. So termination is still guaranteed. 260 d.ldr.SetAttrUsedInIface(rs, true) 261 d.ldr.SetAttrReachable(rs, false) 262 } 263 d.mark(rs, symIdx) 264 } 265 naux := d.ldr.NAux(symIdx) 266 for i := 0; i < naux; i++ { 267 a := d.ldr.Aux(symIdx, i) 268 if a.Type() == goobj.AuxGotype { 269 // A symbol being reachable doesn't imply we need its 270 // type descriptor. Don't mark it. 271 continue 272 } 273 d.mark(a.Sym(), symIdx) 274 } 275 // Record sym if package init func (here naux != 0 is a cheap way 276 // to check first if it is a function symbol). 277 if naux != 0 && d.ldr.IsPkgInit(symIdx) { 278 279 d.pkginits = append(d.pkginits, symIdx) 280 } 281 // Some host object symbols have an outer object, which acts like a 282 // "carrier" symbol, or it holds all the symbols for a particular 283 // section. We need to mark all "referenced" symbols from that carrier, 284 // so we make sure we're pulling in all outer symbols, and their sub 285 // symbols. This is not ideal, and these carrier/section symbols could 286 // be removed. 287 if d.ldr.IsExternal(symIdx) { 288 d.mark(d.ldr.OuterSym(symIdx), symIdx) 289 d.mark(d.ldr.SubSym(symIdx), symIdx) 290 } 291 292 if len(methods) != 0 { 293 if !isgotype { 294 panic("method found on non-type symbol") 295 } 296 // Decode runtime type information for type methods 297 // to help work out which methods can be called 298 // dynamically via interfaces. 299 methodsigs := d.decodetypeMethods(d.ldr, d.ctxt.Arch, symIdx, &relocs) 300 if len(methods) != len(methodsigs) { 301 panic(fmt.Sprintf("%q has %d method relocations for %d methods", d.ldr.SymName(symIdx), len(methods), len(methodsigs))) 302 } 303 for i, m := range methodsigs { 304 methods[i].m = m 305 if d.ctxt.Debugvlog > 1 { 306 d.ctxt.Logf("markable method: %v of sym %v %s\n", m, symIdx, d.ldr.SymName(symIdx)) 307 } 308 } 309 d.markableMethods = append(d.markableMethods, methods...) 310 } 311 } 312 } 313 314 // mapinitcleanup walks all pkg init functions and looks for weak relocations 315 // to mapinit symbols that are no longer reachable. It rewrites 316 // the relocs to target a new no-op routine in the runtime. 317 func (d *deadcodePass) mapinitcleanup() { 318 for _, idx := range d.pkginits { 319 relocs := d.ldr.Relocs(idx) 320 var su *loader.SymbolBuilder 321 for i := 0; i < relocs.Count(); i++ { 322 r := relocs.At(i) 323 rs := r.Sym() 324 if r.Weak() && r.Type().IsDirectCall() && !d.ldr.AttrReachable(rs) { 325 // double check to make sure target is indeed map.init 326 rsn := d.ldr.SymName(rs) 327 if !strings.Contains(rsn, "map.init") { 328 panic(fmt.Sprintf("internal error: expected map.init sym for weak call reloc, got %s -> %s", d.ldr.SymName(idx), rsn)) 329 } 330 d.ldr.SetAttrReachable(d.mapinitnoop, true) 331 if d.ctxt.Debugvlog > 1 { 332 d.ctxt.Logf("deadcode: %s rewrite %s ref to %s\n", 333 d.ldr.SymName(idx), rsn, 334 d.ldr.SymName(d.mapinitnoop)) 335 } 336 if su == nil { 337 su = d.ldr.MakeSymbolUpdater(idx) 338 } 339 su.SetRelocSym(i, d.mapinitnoop) 340 } 341 } 342 } 343 } 344 345 func (d *deadcodePass) mark(symIdx, parent loader.Sym) { 346 if symIdx != 0 && !d.ldr.AttrReachable(symIdx) { 347 d.wq.push(symIdx) 348 d.ldr.SetAttrReachable(symIdx, true) 349 if buildcfg.Experiment.FieldTrack && d.ldr.Reachparent[symIdx] == 0 { 350 d.ldr.Reachparent[symIdx] = parent 351 } 352 if *flagDumpDep { 353 to := d.ldr.SymName(symIdx) 354 if to != "" { 355 to = d.dumpDepAddFlags(to, symIdx) 356 from := "_" 357 if parent != 0 { 358 from = d.ldr.SymName(parent) 359 from = d.dumpDepAddFlags(from, parent) 360 } 361 fmt.Printf("%s -> %s\n", from, to) 362 } 363 } 364 } 365 } 366 367 func (d *deadcodePass) dumpDepAddFlags(name string, symIdx loader.Sym) string { 368 var flags strings.Builder 369 if d.ldr.AttrUsedInIface(symIdx) { 370 flags.WriteString("<UsedInIface>") 371 } 372 if d.ldr.IsReflectMethod(symIdx) { 373 flags.WriteString("<ReflectMethod>") 374 } 375 if flags.Len() > 0 { 376 return name + " " + flags.String() 377 } 378 return name 379 } 380 381 func (d *deadcodePass) markMethod(m methodref) { 382 relocs := d.ldr.Relocs(m.src) 383 d.mark(relocs.At(m.r).Sym(), m.src) 384 d.mark(relocs.At(m.r+1).Sym(), m.src) 385 d.mark(relocs.At(m.r+2).Sym(), m.src) 386 } 387 388 // deadcode marks all reachable symbols. 389 // 390 // The basis of the dead code elimination is a flood fill of symbols, 391 // following their relocations, beginning at *flagEntrySymbol. 392 // 393 // This flood fill is wrapped in logic for pruning unused methods. 394 // All methods are mentioned by relocations on their receiver's *rtype. 395 // These relocations are specially defined as R_METHODOFF by the compiler 396 // so we can detect and manipulated them here. 397 // 398 // There are three ways a method of a reachable type can be invoked: 399 // 400 // 1. direct call 401 // 2. through a reachable interface type 402 // 3. reflect.Value.Method (or MethodByName), or reflect.Type.Method 403 // (or MethodByName) 404 // 405 // The first case is handled by the flood fill, a directly called method 406 // is marked as reachable. 407 // 408 // The second case is handled by decomposing all reachable interface 409 // types into method signatures. Each encountered method is compared 410 // against the interface method signatures, if it matches it is marked 411 // as reachable. This is extremely conservative, but easy and correct. 412 // 413 // The third case is handled by looking for functions that compiler flagged 414 // as REFLECTMETHOD. REFLECTMETHOD on a function F means that F does a method 415 // lookup with reflection, but the compiler was not able to statically determine 416 // the method name. 417 // 418 // All functions that call reflect.Value.Method or reflect.Type.Method are REFLECTMETHODs. 419 // Functions that call reflect.Value.MethodByName or reflect.Type.MethodByName with 420 // a non-constant argument are REFLECTMETHODs, too. If we find a REFLECTMETHOD, 421 // we give up on static analysis, and mark all exported methods of all reachable 422 // types as reachable. 423 // 424 // If the argument to MethodByName is a compile-time constant, the compiler 425 // emits a relocation with the method name. Matching methods are kept in all 426 // reachable types. 427 // 428 // Any unreached text symbols are removed from ctxt.Textp. 429 func deadcode(ctxt *Link) { 430 ldr := ctxt.loader 431 d := deadcodePass{ctxt: ctxt, ldr: ldr} 432 d.init() 433 d.flood() 434 435 if ctxt.DynlinkingGo() { 436 // Exported methods may satisfy interfaces we don't know 437 // about yet when dynamically linking. 438 d.reflectSeen = true 439 } 440 441 for { 442 // Mark all methods that could satisfy a discovered 443 // interface as reachable. We recheck old marked interfaces 444 // as new types (with new methods) may have been discovered 445 // in the last pass. 446 rem := d.markableMethods[:0] 447 for _, m := range d.markableMethods { 448 if (d.reflectSeen && (m.isExported() || d.dynlink)) || d.ifaceMethod[m.m] || d.genericIfaceMethod[m.m.name] { 449 d.markMethod(m) 450 } else { 451 rem = append(rem, m) 452 } 453 } 454 d.markableMethods = rem 455 456 if d.wq.empty() { 457 // No new work was discovered. Done. 458 break 459 } 460 d.flood() 461 } 462 if *flagPruneWeakMap { 463 d.mapinitcleanup() 464 } 465 } 466 467 // methodsig is a typed method signature (name + type). 468 type methodsig struct { 469 name string 470 typ loader.Sym // type descriptor symbol of the function 471 } 472 473 // methodref holds the relocations from a receiver type symbol to its 474 // method. There are three relocations, one for each of the fields in 475 // the reflect.method struct: mtyp, ifn, and tfn. 476 type methodref struct { 477 m methodsig 478 src loader.Sym // receiver type symbol 479 r int // the index of R_METHODOFF relocations 480 } 481 482 func (m methodref) isExported() bool { 483 for _, r := range m.m.name { 484 return unicode.IsUpper(r) 485 } 486 panic("methodref has no signature") 487 } 488 489 // decodeMethodSig decodes an array of method signature information. 490 // Each element of the array is size bytes. The first 4 bytes is a 491 // nameOff for the method name, and the next 4 bytes is a typeOff for 492 // the function type. 493 // 494 // Conveniently this is the layout of both runtime.method and runtime.imethod. 495 func (d *deadcodePass) decodeMethodSig(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs, off, size, count int) []methodsig { 496 if cap(d.methodsigstmp) < count { 497 d.methodsigstmp = append(d.methodsigstmp[:0], make([]methodsig, count)...) 498 } 499 var methods = d.methodsigstmp[:count] 500 for i := 0; i < count; i++ { 501 methods[i].name = decodetypeName(ldr, symIdx, relocs, off) 502 methods[i].typ = decodeRelocSym(ldr, symIdx, relocs, int32(off+4)) 503 off += size 504 } 505 return methods 506 } 507 508 // Decode the method of interface type symbol symIdx at offset off. 509 func (d *deadcodePass) decodeIfaceMethod(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, off int64) methodsig { 510 p := ldr.Data(symIdx) 511 if p == nil { 512 panic(fmt.Sprintf("missing symbol %q", ldr.SymName(symIdx))) 513 } 514 if decodetypeKind(arch, p)&kindMask != kindInterface { 515 panic(fmt.Sprintf("symbol %q is not an interface", ldr.SymName(symIdx))) 516 } 517 relocs := ldr.Relocs(symIdx) 518 var m methodsig 519 m.name = decodetypeName(ldr, symIdx, &relocs, int(off)) 520 m.typ = decodeRelocSym(ldr, symIdx, &relocs, int32(off+4)) 521 return m 522 } 523 524 // Decode the method name stored in symbol symIdx. The symbol should contain just the bytes of a method name. 525 func (d *deadcodePass) decodeGenericIfaceMethod(ldr *loader.Loader, symIdx loader.Sym) string { 526 return ldr.DataString(symIdx) 527 } 528 529 func (d *deadcodePass) decodetypeMethods(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs) []methodsig { 530 p := ldr.Data(symIdx) 531 if !decodetypeHasUncommon(arch, p) { 532 panic(fmt.Sprintf("no methods on %q", ldr.SymName(symIdx))) 533 } 534 off := commonsize(arch) // reflect.rtype 535 switch decodetypeKind(arch, p) & kindMask { 536 case kindStruct: // reflect.structType 537 off += 4 * arch.PtrSize 538 case kindPtr: // reflect.ptrType 539 off += arch.PtrSize 540 case kindFunc: // reflect.funcType 541 off += arch.PtrSize // 4 bytes, pointer aligned 542 case kindSlice: // reflect.sliceType 543 off += arch.PtrSize 544 case kindArray: // reflect.arrayType 545 off += 3 * arch.PtrSize 546 case kindChan: // reflect.chanType 547 off += 2 * arch.PtrSize 548 case kindMap: // reflect.mapType 549 off += 4*arch.PtrSize + 8 550 case kindInterface: // reflect.interfaceType 551 off += 3 * arch.PtrSize 552 default: 553 // just Sizeof(rtype) 554 } 555 556 mcount := int(decodeInuxi(arch, p[off+4:], 2)) 557 moff := int(decodeInuxi(arch, p[off+4+2+2:], 4)) 558 off += moff // offset to array of reflect.method values 559 const sizeofMethod = 4 * 4 // sizeof reflect.method in program 560 return d.decodeMethodSig(ldr, arch, symIdx, relocs, off, sizeofMethod, mcount) 561 }