github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/link/ld/deadcode.go (about) 1 // Copyright 2019 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package ld 6 7 import ( 8 "fmt" 9 "strings" 10 "unicode" 11 12 "github.com/go-asm/go/buildcfg" 13 "github.com/go-asm/go/cmd/goobj" 14 "github.com/go-asm/go/cmd/link/loader" 15 "github.com/go-asm/go/cmd/link/sym" 16 "github.com/go-asm/go/cmd/objabi" 17 "github.com/go-asm/go/cmd/sys" 18 ) 19 20 var _ = fmt.Print 21 22 type deadcodePass struct { 23 ctxt *Link 24 ldr *loader.Loader 25 wq heap // work queue, using min-heap for better locality 26 27 ifaceMethod map[methodsig]bool // methods called from reached interface call sites 28 genericIfaceMethod map[string]bool // names of methods called from reached generic interface call sites 29 markableMethods []methodref // methods of reached types 30 reflectSeen bool // whether we have seen a reflect method call 31 dynlink bool 32 33 methodsigstmp []methodsig // scratch buffer for decoding method signatures 34 pkginits []loader.Sym 35 mapinitnoop loader.Sym 36 } 37 38 func (d *deadcodePass) init() { 39 d.ldr.InitReachable() 40 d.ifaceMethod = make(map[methodsig]bool) 41 d.genericIfaceMethod = make(map[string]bool) 42 if buildcfg.Experiment.FieldTrack { 43 d.ldr.Reachparent = make([]loader.Sym, d.ldr.NSym()) 44 } 45 d.dynlink = d.ctxt.DynlinkingGo() 46 47 if d.ctxt.BuildMode == BuildModeShared { 48 // Mark all symbols defined in this library as reachable when 49 // building a shared library. 50 n := d.ldr.NDef() 51 for i := 1; i < n; i++ { 52 s := loader.Sym(i) 53 d.mark(s, 0) 54 } 55 d.mark(d.ctxt.mainInittasks, 0) 56 return 57 } 58 59 var names []string 60 61 // In a normal binary, start at main.main and the init 62 // functions and mark what is reachable from there. 63 if d.ctxt.linkShared && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) { 64 names = append(names, "main.main", "main..inittask") 65 } else { 66 // The external linker refers main symbol directly. 67 if d.ctxt.LinkMode == LinkExternal && (d.ctxt.BuildMode == BuildModeExe || d.ctxt.BuildMode == BuildModePIE) { 68 if d.ctxt.HeadType == objabi.Hwindows && d.ctxt.Arch.Family == sys.I386 { 69 *flagEntrySymbol = "_main" 70 } else { 71 *flagEntrySymbol = "main" 72 } 73 } 74 names = append(names, *flagEntrySymbol) 75 } 76 // runtime.unreachableMethod is a function that will throw if called. 77 // We redirect unreachable methods to it. 78 names = append(names, "runtime.unreachableMethod") 79 if d.ctxt.BuildMode == BuildModePlugin { 80 names = append(names, objabi.PathToPrefix(*flagPluginPath)+"..inittask", objabi.PathToPrefix(*flagPluginPath)+".main", "go:plugin.tabs") 81 82 // We don't keep the go.plugin.exports symbol, 83 // but we do keep the symbols it refers to. 84 exportsIdx := d.ldr.Lookup("go:plugin.exports", 0) 85 if exportsIdx != 0 { 86 relocs := d.ldr.Relocs(exportsIdx) 87 for i := 0; i < relocs.Count(); i++ { 88 d.mark(relocs.At(i).Sym(), 0) 89 } 90 } 91 } 92 93 if d.ctxt.Debugvlog > 1 { 94 d.ctxt.Logf("deadcode start names: %v\n", names) 95 } 96 97 for _, name := range names { 98 // Mark symbol as a data/ABI0 symbol. 99 d.mark(d.ldr.Lookup(name, 0), 0) 100 if abiInternalVer != 0 { 101 // Also mark any Go functions (internal ABI). 102 d.mark(d.ldr.Lookup(name, abiInternalVer), 0) 103 } 104 } 105 106 // All dynamic exports are roots. 107 for _, s := range d.ctxt.dynexp { 108 if d.ctxt.Debugvlog > 1 { 109 d.ctxt.Logf("deadcode start dynexp: %s<%d>\n", d.ldr.SymName(s), d.ldr.SymVersion(s)) 110 } 111 d.mark(s, 0) 112 } 113 114 d.mapinitnoop = d.ldr.Lookup("runtime.mapinitnoop", abiInternalVer) 115 if d.mapinitnoop == 0 { 116 panic("could not look up runtime.mapinitnoop") 117 } 118 if d.ctxt.mainInittasks != 0 { 119 d.mark(d.ctxt.mainInittasks, 0) 120 } 121 } 122 123 func (d *deadcodePass) flood() { 124 var methods []methodref 125 for !d.wq.empty() { 126 symIdx := d.wq.pop() 127 128 // Methods may be called via reflection. Give up on static analysis, 129 // and mark all exported methods of all reachable types as reachable. 130 d.reflectSeen = d.reflectSeen || d.ldr.IsReflectMethod(symIdx) 131 132 isgotype := d.ldr.IsGoType(symIdx) 133 relocs := d.ldr.Relocs(symIdx) 134 var usedInIface bool 135 136 if isgotype { 137 if d.dynlink { 138 // When dynamic linking, a type may be passed across DSO 139 // boundary and get converted to interface at the other side. 140 d.ldr.SetAttrUsedInIface(symIdx, true) 141 } 142 usedInIface = d.ldr.AttrUsedInIface(symIdx) 143 } 144 145 methods = methods[:0] 146 for i := 0; i < relocs.Count(); i++ { 147 r := relocs.At(i) 148 if r.Weak() { 149 convertWeakToStrong := false 150 // When build with "-linkshared", we can't tell if the 151 // interface method in itab will be used or not. 152 // Ignore the weak attribute. 153 if d.ctxt.linkShared && d.ldr.IsItab(symIdx) { 154 convertWeakToStrong = true 155 } 156 // If the program uses plugins, we can no longer treat 157 // relocs from pkg init functions to outlined map init 158 // fragments as weak, since doing so can cause package 159 // init clashes between the main program and the 160 // plugin. See #62430 for more details. 161 if d.ctxt.canUsePlugins && r.Type().IsDirectCall() { 162 convertWeakToStrong = true 163 } 164 if !convertWeakToStrong { 165 // skip this reloc 166 continue 167 } 168 } 169 t := r.Type() 170 switch t { 171 case objabi.R_METHODOFF: 172 if i+2 >= relocs.Count() { 173 panic("expect three consecutive R_METHODOFF relocs") 174 } 175 if usedInIface { 176 methods = append(methods, methodref{src: symIdx, r: i}) 177 // The method descriptor is itself a type descriptor, and 178 // it can be used to reach other types, e.g. by using 179 // reflect.Type.Method(i).Type.In(j). We need to traverse 180 // its child types with UsedInIface set. (See also the 181 // comment below.) 182 rs := r.Sym() 183 if !d.ldr.AttrUsedInIface(rs) { 184 d.ldr.SetAttrUsedInIface(rs, true) 185 if d.ldr.AttrReachable(rs) { 186 d.ldr.SetAttrReachable(rs, false) 187 d.mark(rs, symIdx) 188 } 189 } 190 } 191 i += 2 192 continue 193 case objabi.R_USETYPE: 194 // type symbol used for DWARF. we need to load the symbol but it may not 195 // be otherwise reachable in the program. 196 // do nothing for now as we still load all type symbols. 197 continue 198 case objabi.R_USEIFACE: 199 // R_USEIFACE is a marker relocation that tells the linker the type is 200 // converted to an interface, i.e. should have UsedInIface set. See the 201 // comment below for why we need to unset the Reachable bit and re-mark it. 202 rs := r.Sym() 203 if d.ldr.IsItab(rs) { 204 // This relocation can also point at an itab, in which case it 205 // means "the _type field of that itab". 206 rs = decodeItabType(d.ldr, d.ctxt.Arch, rs) 207 } 208 if !d.ldr.IsGoType(rs) && !d.ctxt.linkShared { 209 panic(fmt.Sprintf("R_USEIFACE in %s references %s which is not a type or itab", d.ldr.SymName(symIdx), d.ldr.SymName(rs))) 210 } 211 if !d.ldr.AttrUsedInIface(rs) { 212 d.ldr.SetAttrUsedInIface(rs, true) 213 if d.ldr.AttrReachable(rs) { 214 d.ldr.SetAttrReachable(rs, false) 215 d.mark(rs, symIdx) 216 } 217 } 218 continue 219 case objabi.R_USEIFACEMETHOD: 220 // R_USEIFACEMETHOD is a marker relocation that marks an interface 221 // method as used. 222 rs := r.Sym() 223 if d.ctxt.linkShared && (d.ldr.SymType(rs) == sym.SDYNIMPORT || d.ldr.SymType(rs) == sym.Sxxx) { 224 // Don't decode symbol from shared library (we'll mark all exported methods anyway). 225 // We check for both SDYNIMPORT and Sxxx because name-mangled symbols haven't 226 // been resolved at this point. 227 continue 228 } 229 m := d.decodeIfaceMethod(d.ldr, d.ctxt.Arch, rs, r.Add()) 230 if d.ctxt.Debugvlog > 1 { 231 d.ctxt.Logf("reached iface method: %v\n", m) 232 } 233 d.ifaceMethod[m] = true 234 continue 235 case objabi.R_USENAMEDMETHOD: 236 name := d.decodeGenericIfaceMethod(d.ldr, r.Sym()) 237 if d.ctxt.Debugvlog > 1 { 238 d.ctxt.Logf("reached generic iface method: %s\n", name) 239 } 240 d.genericIfaceMethod[name] = true 241 continue // don't mark referenced symbol - it is not needed in the final binary. 242 case objabi.R_INITORDER: 243 // inittasks has already run, so any R_INITORDER links are now 244 // superfluous - the only live inittask records are those which are 245 // in a scheduled list somewhere (e.g. runtime.moduledata.inittasks). 246 continue 247 } 248 rs := r.Sym() 249 if isgotype && usedInIface && d.ldr.IsGoType(rs) && !d.ldr.AttrUsedInIface(rs) { 250 // If a type is converted to an interface, it is possible to obtain an 251 // interface with a "child" type of it using reflection (e.g. obtain an 252 // interface of T from []chan T). We need to traverse its "child" types 253 // with UsedInIface attribute set. 254 // When visiting the child type (chan T in the example above), it will 255 // have UsedInIface set, so it in turn will mark and (re)visit its children 256 // (e.g. T above). 257 // We unset the reachable bit here, so if the child type is already visited, 258 // it will be visited again. 259 // Note that a type symbol can be visited at most twice, one without 260 // UsedInIface and one with. So termination is still guaranteed. 261 d.ldr.SetAttrUsedInIface(rs, true) 262 d.ldr.SetAttrReachable(rs, false) 263 } 264 d.mark(rs, symIdx) 265 } 266 naux := d.ldr.NAux(symIdx) 267 for i := 0; i < naux; i++ { 268 a := d.ldr.Aux(symIdx, i) 269 if a.Type() == goobj.AuxGotype { 270 // A symbol being reachable doesn't imply we need its 271 // type descriptor. Don't mark it. 272 continue 273 } 274 d.mark(a.Sym(), symIdx) 275 } 276 // Record sym if package init func (here naux != 0 is a cheap way 277 // to check first if it is a function symbol). 278 if naux != 0 && d.ldr.IsPkgInit(symIdx) { 279 280 d.pkginits = append(d.pkginits, symIdx) 281 } 282 // Some host object symbols have an outer object, which acts like a 283 // "carrier" symbol, or it holds all the symbols for a particular 284 // section. We need to mark all "referenced" symbols from that carrier, 285 // so we make sure we're pulling in all outer symbols, and their sub 286 // symbols. This is not ideal, and these carrier/section symbols could 287 // be removed. 288 if d.ldr.IsExternal(symIdx) { 289 d.mark(d.ldr.OuterSym(symIdx), symIdx) 290 d.mark(d.ldr.SubSym(symIdx), symIdx) 291 } 292 293 if len(methods) != 0 { 294 if !isgotype { 295 panic("method found on non-type symbol") 296 } 297 // Decode runtime type information for type methods 298 // to help work out which methods can be called 299 // dynamically via interfaces. 300 methodsigs := d.decodetypeMethods(d.ldr, d.ctxt.Arch, symIdx, &relocs) 301 if len(methods) != len(methodsigs) { 302 panic(fmt.Sprintf("%q has %d method relocations for %d methods", d.ldr.SymName(symIdx), len(methods), len(methodsigs))) 303 } 304 for i, m := range methodsigs { 305 methods[i].m = m 306 if d.ctxt.Debugvlog > 1 { 307 d.ctxt.Logf("markable method: %v of sym %v %s\n", m, symIdx, d.ldr.SymName(symIdx)) 308 } 309 } 310 d.markableMethods = append(d.markableMethods, methods...) 311 } 312 } 313 } 314 315 // mapinitcleanup walks all pkg init functions and looks for weak relocations 316 // to mapinit symbols that are no longer reachable. It rewrites 317 // the relocs to target a new no-op routine in the runtime. 318 func (d *deadcodePass) mapinitcleanup() { 319 for _, idx := range d.pkginits { 320 relocs := d.ldr.Relocs(idx) 321 var su *loader.SymbolBuilder 322 for i := 0; i < relocs.Count(); i++ { 323 r := relocs.At(i) 324 rs := r.Sym() 325 if r.Weak() && r.Type().IsDirectCall() && !d.ldr.AttrReachable(rs) { 326 // double check to make sure target is indeed map.init 327 rsn := d.ldr.SymName(rs) 328 if !strings.Contains(rsn, "map.init") { 329 panic(fmt.Sprintf("internal error: expected map.init sym for weak call reloc, got %s -> %s", d.ldr.SymName(idx), rsn)) 330 } 331 d.ldr.SetAttrReachable(d.mapinitnoop, true) 332 if d.ctxt.Debugvlog > 1 { 333 d.ctxt.Logf("deadcode: %s rewrite %s ref to %s\n", 334 d.ldr.SymName(idx), rsn, 335 d.ldr.SymName(d.mapinitnoop)) 336 } 337 if su == nil { 338 su = d.ldr.MakeSymbolUpdater(idx) 339 } 340 su.SetRelocSym(i, d.mapinitnoop) 341 } 342 } 343 } 344 } 345 346 func (d *deadcodePass) mark(symIdx, parent loader.Sym) { 347 if symIdx != 0 && !d.ldr.AttrReachable(symIdx) { 348 d.wq.push(symIdx) 349 d.ldr.SetAttrReachable(symIdx, true) 350 if buildcfg.Experiment.FieldTrack && d.ldr.Reachparent[symIdx] == 0 { 351 d.ldr.Reachparent[symIdx] = parent 352 } 353 if *flagDumpDep { 354 to := d.ldr.SymName(symIdx) 355 if to != "" { 356 to = d.dumpDepAddFlags(to, symIdx) 357 from := "_" 358 if parent != 0 { 359 from = d.ldr.SymName(parent) 360 from = d.dumpDepAddFlags(from, parent) 361 } 362 fmt.Printf("%s -> %s\n", from, to) 363 } 364 } 365 } 366 } 367 368 func (d *deadcodePass) dumpDepAddFlags(name string, symIdx loader.Sym) string { 369 var flags strings.Builder 370 if d.ldr.AttrUsedInIface(symIdx) { 371 flags.WriteString("<UsedInIface>") 372 } 373 if d.ldr.IsReflectMethod(symIdx) { 374 flags.WriteString("<ReflectMethod>") 375 } 376 if flags.Len() > 0 { 377 return name + " " + flags.String() 378 } 379 return name 380 } 381 382 func (d *deadcodePass) markMethod(m methodref) { 383 relocs := d.ldr.Relocs(m.src) 384 d.mark(relocs.At(m.r).Sym(), m.src) 385 d.mark(relocs.At(m.r+1).Sym(), m.src) 386 d.mark(relocs.At(m.r+2).Sym(), m.src) 387 } 388 389 // deadcode marks all reachable symbols. 390 // 391 // The basis of the dead code elimination is a flood fill of symbols, 392 // following their relocations, beginning at *flagEntrySymbol. 393 // 394 // This flood fill is wrapped in logic for pruning unused methods. 395 // All methods are mentioned by relocations on their receiver's *rtype. 396 // These relocations are specially defined as R_METHODOFF by the compiler 397 // so we can detect and manipulated them here. 398 // 399 // There are three ways a method of a reachable type can be invoked: 400 // 401 // 1. direct call 402 // 2. through a reachable interface type 403 // 3. reflect.Value.Method (or MethodByName), or reflect.Type.Method 404 // (or MethodByName) 405 // 406 // The first case is handled by the flood fill, a directly called method 407 // is marked as reachable. 408 // 409 // The second case is handled by decomposing all reachable interface 410 // types into method signatures. Each encountered method is compared 411 // against the interface method signatures, if it matches it is marked 412 // as reachable. This is extremely conservative, but easy and correct. 413 // 414 // The third case is handled by looking for functions that compiler flagged 415 // as REFLECTMETHOD. REFLECTMETHOD on a function F means that F does a method 416 // lookup with reflection, but the compiler was not able to statically determine 417 // the method name. 418 // 419 // All functions that call reflect.Value.Method or reflect.Type.Method are REFLECTMETHODs. 420 // Functions that call reflect.Value.MethodByName or reflect.Type.MethodByName with 421 // a non-constant argument are REFLECTMETHODs, too. If we find a REFLECTMETHOD, 422 // we give up on static analysis, and mark all exported methods of all reachable 423 // types as reachable. 424 // 425 // If the argument to MethodByName is a compile-time constant, the compiler 426 // emits a relocation with the method name. Matching methods are kept in all 427 // reachable types. 428 // 429 // Any unreached text symbols are removed from ctxt.Textp. 430 func deadcode(ctxt *Link) { 431 ldr := ctxt.loader 432 d := deadcodePass{ctxt: ctxt, ldr: ldr} 433 d.init() 434 d.flood() 435 436 if ctxt.DynlinkingGo() { 437 // Exported methods may satisfy interfaces we don't know 438 // about yet when dynamically linking. 439 d.reflectSeen = true 440 } 441 442 for { 443 // Mark all methods that could satisfy a discovered 444 // interface as reachable. We recheck old marked interfaces 445 // as new types (with new methods) may have been discovered 446 // in the last pass. 447 rem := d.markableMethods[:0] 448 for _, m := range d.markableMethods { 449 if (d.reflectSeen && (m.isExported() || d.dynlink)) || d.ifaceMethod[m.m] || d.genericIfaceMethod[m.m.name] { 450 d.markMethod(m) 451 } else { 452 rem = append(rem, m) 453 } 454 } 455 d.markableMethods = rem 456 457 if d.wq.empty() { 458 // No new work was discovered. Done. 459 break 460 } 461 d.flood() 462 } 463 if *flagPruneWeakMap { 464 d.mapinitcleanup() 465 } 466 } 467 468 // methodsig is a typed method signature (name + type). 469 type methodsig struct { 470 name string 471 typ loader.Sym // type descriptor symbol of the function 472 } 473 474 // methodref holds the relocations from a receiver type symbol to its 475 // method. There are three relocations, one for each of the fields in 476 // the reflect.method struct: mtyp, ifn, and tfn. 477 type methodref struct { 478 m methodsig 479 src loader.Sym // receiver type symbol 480 r int // the index of R_METHODOFF relocations 481 } 482 483 func (m methodref) isExported() bool { 484 for _, r := range m.m.name { 485 return unicode.IsUpper(r) 486 } 487 panic("methodref has no signature") 488 } 489 490 // decodeMethodSig decodes an array of method signature information. 491 // Each element of the array is size bytes. The first 4 bytes is a 492 // nameOff for the method name, and the next 4 bytes is a typeOff for 493 // the function type. 494 // 495 // Conveniently this is the layout of both runtime.method and runtime.imethod. 496 func (d *deadcodePass) decodeMethodSig(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs, off, size, count int) []methodsig { 497 if cap(d.methodsigstmp) < count { 498 d.methodsigstmp = append(d.methodsigstmp[:0], make([]methodsig, count)...) 499 } 500 var methods = d.methodsigstmp[:count] 501 for i := 0; i < count; i++ { 502 methods[i].name = decodetypeName(ldr, symIdx, relocs, off) 503 methods[i].typ = decodeRelocSym(ldr, symIdx, relocs, int32(off+4)) 504 off += size 505 } 506 return methods 507 } 508 509 // Decode the method of interface type symbol symIdx at offset off. 510 func (d *deadcodePass) decodeIfaceMethod(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, off int64) methodsig { 511 p := ldr.Data(symIdx) 512 if p == nil { 513 panic(fmt.Sprintf("missing symbol %q", ldr.SymName(symIdx))) 514 } 515 if decodetypeKind(arch, p)&kindMask != kindInterface { 516 panic(fmt.Sprintf("symbol %q is not an interface", ldr.SymName(symIdx))) 517 } 518 relocs := ldr.Relocs(symIdx) 519 var m methodsig 520 m.name = decodetypeName(ldr, symIdx, &relocs, int(off)) 521 m.typ = decodeRelocSym(ldr, symIdx, &relocs, int32(off+4)) 522 return m 523 } 524 525 // Decode the method name stored in symbol symIdx. The symbol should contain just the bytes of a method name. 526 func (d *deadcodePass) decodeGenericIfaceMethod(ldr *loader.Loader, symIdx loader.Sym) string { 527 return ldr.DataString(symIdx) 528 } 529 530 func (d *deadcodePass) decodetypeMethods(ldr *loader.Loader, arch *sys.Arch, symIdx loader.Sym, relocs *loader.Relocs) []methodsig { 531 p := ldr.Data(symIdx) 532 if !decodetypeHasUncommon(arch, p) { 533 panic(fmt.Sprintf("no methods on %q", ldr.SymName(symIdx))) 534 } 535 off := commonsize(arch) // reflect.rtype 536 switch decodetypeKind(arch, p) & kindMask { 537 case kindStruct: // reflect.structType 538 off += 4 * arch.PtrSize 539 case kindPtr: // reflect.ptrType 540 off += arch.PtrSize 541 case kindFunc: // reflect.funcType 542 off += arch.PtrSize // 4 bytes, pointer aligned 543 case kindSlice: // reflect.sliceType 544 off += arch.PtrSize 545 case kindArray: // reflect.arrayType 546 off += 3 * arch.PtrSize 547 case kindChan: // reflect.chanType 548 off += 2 * arch.PtrSize 549 case kindMap: // reflect.mapType 550 off += 4*arch.PtrSize + 8 551 case kindInterface: // reflect.interfaceType 552 off += 3 * arch.PtrSize 553 default: 554 // just Sizeof(rtype) 555 } 556 557 mcount := int(decodeInuxi(arch, p[off+4:], 2)) 558 moff := int(decodeInuxi(arch, p[off+4+2+2:], 4)) 559 off += moff // offset to array of reflect.method values 560 const sizeofMethod = 4 * 4 // sizeof reflect.method in program 561 return d.decodeMethodSig(ldr, arch, symIdx, relocs, off, sizeofMethod, mcount) 562 }