github.com/alexanius/gollvm12@v0.0.0-20230419200121-b152358b84f3/passes/GoStatepoints.cpp (about) 1 //===- GoStatepoints.cpp - Insert statepoints for Go GC -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Rewrite call/invoke instructions so as to record live variables on 11 // stack for the use of garbage collector. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "CallingConv.h" 16 #include "GoStatepoints.h" 17 #include "GoStackMap.h" 18 #include "GollvmPasses.h" 19 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/BitVector.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/DenseSet.h" 24 #include "llvm/ADT/MapVector.h" 25 #include "llvm/ADT/None.h" 26 #include "llvm/ADT/Optional.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/ADT/SetVector.h" 29 #include "llvm/ADT/SmallSet.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/ADT/StringRef.h" 32 #include "llvm/ADT/iterator_range.h" 33 #include "llvm/Analysis/DomTreeUpdater.h" 34 #include "llvm/Analysis/TargetLibraryInfo.h" 35 #include "llvm/Analysis/TargetTransformInfo.h" 36 #include "llvm/IR/Argument.h" 37 #include "llvm/IR/Attributes.h" 38 #include "llvm/IR/BasicBlock.h" 39 #include "llvm/IR/Constant.h" 40 #include "llvm/IR/Constants.h" 41 #include "llvm/IR/DataLayout.h" 42 #include "llvm/IR/DerivedTypes.h" 43 #include "llvm/IR/Dominators.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/IRBuilder.h" 46 #include "llvm/IR/InstIterator.h" 47 #include "llvm/IR/InstrTypes.h" 48 #include "llvm/IR/Instruction.h" 49 #include "llvm/IR/Instructions.h" 50 #include "llvm/IR/IntrinsicInst.h" 51 #include "llvm/IR/Intrinsics.h" 52 #include "llvm/IR/LLVMContext.h" 53 #include "llvm/IR/MDBuilder.h" 54 #include "llvm/IR/Metadata.h" 55 #include "llvm/IR/Module.h" 56 #include "llvm/IR/Statepoint.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/User.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/IR/ValueHandle.h" 61 #include "llvm/Pass.h" 62 #include "llvm/InitializePasses.h" 63 #include "llvm/Support/Casting.h" 64 #include "llvm/Support/CommandLine.h" 65 #include "llvm/Support/Compiler.h" 66 #include "llvm/Support/Debug.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/raw_ostream.h" 69 #include "llvm/Transforms/Scalar.h" 70 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 71 #include "llvm/Transforms/Utils/Local.h" 72 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 73 #include <algorithm> 74 #include <cassert> 75 #include <cstddef> 76 #include <cstdint> 77 #include <iterator> 78 #include <set> 79 #include <string> 80 #include <utility> 81 #include <vector> 82 83 #define DEBUG_TYPE "go-statepoints" 84 85 using namespace llvm; 86 using namespace gollvm::passes; 87 88 // Print the liveset found at the insert location 89 static cl::opt<bool> PrintLiveSet("gogc-print-liveset", cl::Hidden, 90 cl::init(false)); 91 92 // Print the liveset only for the specified function. 93 static cl::opt<std::string> PrintFunc("gogc-print-func", cl::Hidden, 94 cl::init("")); 95 96 // At each statepoint, clobber all the stack slots that are considered 97 // dead, for debugging purposes. 98 static cl::opt<bool> ClobberNonLive("gogc-clobber-non-live", 99 cl::Hidden, cl::init(false)); 100 101 // Statepoint ID. TODO: this is not thread safe. 102 static uint64_t ID = 0; 103 104 /// The IR fed into this pass may have had attributes and 105 /// metadata implying dereferenceability that are no longer valid/correct after 106 /// this pass has run. This is because semantically, after 107 /// this pass runs, all calls to gc.statepoint "free" the entire 108 /// heap. stripNonValidData (conservatively) restores 109 /// correctness by erasing all attributes in the module that externally imply 110 /// dereferenceability. Similar reasoning also applies to the noalias 111 /// attributes and metadata. gc.statepoint can touch the entire heap including 112 /// noalias objects. 113 /// Apart from attributes and metadata, we also remove instructions that imply 114 /// constant physical memory: llvm.invariant.start. 115 // 116 // TODO: revisit this. For a non-moving GC some attributes may still be valid. 117 // It probably doesn't really matter, as we run this pass at the end of 118 // optimization pipeline. 119 static void stripNonValidData(Module &M); 120 121 static bool shouldRewriteStatepointsIn(Function &F); 122 123 PreservedAnalyses GoStatepoints::run(Module &M, 124 ModuleAnalysisManager &AM) { 125 // Create a sentinel global variable for stack maps. 126 Type *Int64Ty = Type::getInt64Ty(M.getContext()); 127 new GlobalVariable(M, Int64Ty, /* isConstant */ true, 128 GlobalValue::InternalLinkage, 129 ConstantInt::get(Int64Ty, GO_FUNC_SENTINEL), 130 GO_FUNC_SYM); 131 132 bool Changed = false; 133 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 134 for (Function &F : M) { 135 // Nothing to do for declarations. 136 if (F.isDeclaration() || F.empty()) 137 continue; 138 139 // Policy choice says not to rewrite - the most common reason is that we're 140 // compiling code without a GCStrategy. 141 if (!shouldRewriteStatepointsIn(F)) 142 continue; 143 144 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 145 auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 146 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 147 Changed |= runOnFunction(F, DT, TTI, TLI); 148 } 149 if (!Changed) 150 return PreservedAnalyses::all(); 151 152 // stripNonValidData asserts that shouldRewriteStatepointsIn 153 // returns true for at least one function in the module. Since at least 154 // one function changed, we know that the precondition is satisfied. 155 stripNonValidData(M); 156 157 PreservedAnalyses PA; 158 PA.preserve<TargetIRAnalysis>(); 159 PA.preserve<TargetLibraryAnalysis>(); 160 return PA; 161 } 162 163 namespace { 164 165 class GoStatepointsLegacyPass : public ModulePass { 166 GoStatepoints Impl; 167 168 public: 169 static char ID; // Pass identification, replacement for typeid 170 171 GoStatepointsLegacyPass() : ModulePass(ID), Impl() { 172 initializeGoStatepointsLegacyPassPass( 173 *PassRegistry::getPassRegistry()); 174 } 175 176 bool runOnModule(Module &M) override { 177 // Create a sentinel global variable for stack maps. 178 Type *Int64Ty = Type::getInt64Ty(M.getContext()); 179 new GlobalVariable(M, Int64Ty, /* isConstant */ true, 180 GlobalValue::InternalLinkage, 181 ConstantInt::get(Int64Ty, GO_FUNC_SENTINEL), 182 GO_FUNC_SYM); 183 184 bool Changed = false; 185 for (Function &F : M) { 186 // Nothing to do for declarations. 187 if (F.isDeclaration() || F.empty()) 188 continue; 189 190 // Policy choice says not to rewrite - the most common reason is that 191 // we're compiling code without a GCStrategy. 192 if (!shouldRewriteStatepointsIn(F)) 193 continue; 194 195 const TargetLibraryInfo &TLI = 196 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 197 198 TargetTransformInfo &TTI = 199 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 200 auto &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 201 202 Changed |= Impl.runOnFunction(F, DT, TTI, TLI); 203 } 204 205 if (!Changed) 206 return false; 207 208 // stripNonValidData asserts that shouldRewriteStatepointsIn 209 // returns true for at least one function in the module. Since at least 210 // one function changed, we know that the precondition is satisfied. 211 stripNonValidData(M); 212 return true; 213 } 214 215 void getAnalysisUsage(AnalysisUsage &AU) const override { 216 // We add and rewrite a bunch of instructions, but don't really do much 217 // else. We could in theory preserve a lot more analyses here. 218 AU.addRequired<DominatorTreeWrapperPass>(); 219 AU.addRequired<TargetTransformInfoWrapperPass>(); 220 AU.addRequired<TargetLibraryInfoWrapperPass>(); 221 } 222 }; 223 224 } // end anonymous namespace 225 226 char GoStatepointsLegacyPass::ID = 0; 227 228 ModulePass *llvm::createGoStatepointsLegacyPass() { 229 return new GoStatepointsLegacyPass(); 230 } 231 232 INITIALIZE_PASS_BEGIN(GoStatepointsLegacyPass, 233 "go-statepoints", 234 "Insert statepoints for Go GC", false, false) 235 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 236 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 237 INITIALIZE_PASS_END(GoStatepointsLegacyPass, 238 "go-statepoints", 239 "Insert statepoints for Go GC", false, false) 240 241 namespace { 242 243 // Liveness tracking has three parts: in-register values, non-address-taken 244 // allocas (stack slots), and address-taken allocas. 245 // 246 // In-register values are live since it is defined, until it has no more 247 // use. At statepoints they are spilled so the runtime can find them on 248 // the stack. 249 // 250 // Non-address-taken allocas are live since it is initialized, until it 251 // has no more use. At statepoints they are not spilled but its stack 252 // location is recorded. 253 // 254 // Address-taken allocas are live since it is initialized and remain live 255 // thereafter, unless an explicit lifetime.end is seen. As above, they 256 // are not spilled at statepoints but has its stack location recorded. 257 // 258 // In the data structure, there are some overlap. The live sets are used 259 // for both in-register values and non-address-taken allocas, but not 260 // for address-taken allocas. The alloca def/kill sets are used for both 261 // kinds of allocas. 262 struct GCPtrLivenessData { 263 // In-register value and non-address-taken alloca. 264 265 /// Values defined in this block. 266 MapVector<BasicBlock *, SetVector<Value *>> KillSet; 267 /// Values used in this block (and thus live); does not included values 268 /// killed within this block. 269 MapVector<BasicBlock *, SetVector<Value *>> LiveSet; 270 /// Values live into this basic block (i.e. used by any 271 /// instruction in this basic block or ones reachable from here) 272 MapVector<BasicBlock *, SetVector<Value *>> LiveIn; 273 /// Values live out of this basic block (i.e. live into 274 /// any successor block) 275 MapVector<BasicBlock *, SetVector<Value *>> LiveOut; 276 277 // Alloca liveness. 278 279 MapVector<BasicBlock *, SetVector<Value *>> AllocaDefSet; // initialized in BB 280 MapVector<BasicBlock *, SetVector<Value *>> AllocaKillSet; // killed (lifetime.end) in BB 281 282 // Unlike above, these are propagated forwards, instead of backwards. 283 MapVector<BasicBlock *, SetVector<Value *>> AllocaDefAny; // initialized at any path reaching the end of BB 284 MapVector<BasicBlock *, SetVector<Value *>> AllocaDefAll; // initialized at all paths reaching the end of BB 285 }; 286 287 // The type of the internal cache used inside the findBasePointers family 288 // of functions. From the callers perspective, this is an opaque type and 289 // should not be inspected. 290 // 291 // In the actual implementation this caches two relations: 292 // - The base relation itself (i.e. this pointer is based on that one) 293 // - The base defining value relation (i.e. before base_phi insertion) 294 // Generally, after the execution of a full findBasePointer call, only the 295 // base relation will remain. Internally, we add a mixture of the two 296 // types, then update all the second type to the first type 297 using DefiningValueMapTy = MapVector<Value *, Value *>; 298 using StatepointLiveSetTy = SetVector<Value *>; 299 using RematerializedValueMapTy = 300 MapVector<AssertingVH<Instruction>, AssertingVH<Value>>; 301 302 struct PartiallyConstructedSafepointRecord { 303 /// The set of values known to be live across this safepoint 304 StatepointLiveSetTy LiveSet; 305 306 /// Mapping from live pointers to a base-defining-value 307 MapVector<Value *, Value *> PointerToBase; 308 309 /// The *new* gc.statepoint instruction itself. This produces the token 310 /// that normal path gc.relocates and the gc.result are tied to. 311 GCStatepointInst *StatepointToken; 312 313 /// Instruction to which exceptional gc relocates are attached 314 /// Makes it easier to iterate through them during relocationViaAlloca. 315 Instruction *UnwindToken; 316 317 /// Record live values we are rematerialized instead of relocating. 318 /// They are not included into 'LiveSet' field. 319 /// Maps rematerialized copy to it's original value. 320 RematerializedValueMapTy RematerializedValues; 321 }; 322 323 } // end anonymous namespace 324 325 /// Compute the live-in set for every basic block in the function 326 static void computeLiveInValues(DominatorTree &DT, Function &F, 327 GCPtrLivenessData &Data, 328 SetVector<Value *> &AddrTakenAllocas, 329 SetVector<Value *> &ToZero, 330 SetVector<Value *> &BadLoads, 331 DefiningValueMapTy &DVCache); 332 333 /// Given results from the dataflow liveness computation, find the set of live 334 /// Values at a particular instruction. 335 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, 336 SetVector<Value *> &AddrTakenAllocas, 337 StatepointLiveSetTy &out, 338 SetVector<Value *> &AllAllocas, 339 DefiningValueMapTy &DVCache); 340 341 // TODO: Once we can get to the GCStrategy, this becomes 342 // Optional<bool> isGCManagedPointer(const Type *Ty) const override { 343 344 static bool isGCPointerType(Type *T) { 345 return isa<PointerType>(T); 346 } 347 348 // Return true if this type is one which a) is a gc pointer or contains a GC 349 // pointer and b) is of a type this code expects to encounter as a live value. 350 // (The insertion code will assert that a type which matches (a) and not (b) 351 // is not encountered.) 352 static bool isHandledGCPointerType(Type *T) { 353 // We fully support gc pointers 354 if (isGCPointerType(T)) 355 return true; 356 // We partially support vectors of gc pointers. The code will assert if it 357 // can't handle something. 358 if (auto VT = dyn_cast<VectorType>(T)) 359 if (isGCPointerType(VT->getElementType())) 360 return true; 361 // FCA is supported. 362 if (T->isStructTy()) 363 return hasPointer(T); 364 return false; 365 } 366 367 #ifndef NDEBUG 368 /// Returns true if this type contains a gc pointer whether we know how to 369 /// handle that type or not. 370 static bool containsGCPtrType(Type *Ty) { 371 if (isGCPointerType(Ty)) 372 return true; 373 if (VectorType *VT = dyn_cast<VectorType>(Ty)) 374 return isGCPointerType(VT->getScalarType()); 375 if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) 376 return containsGCPtrType(AT->getElementType()); 377 if (StructType *ST = dyn_cast<StructType>(Ty)) 378 return llvm::any_of(ST->subtypes(), containsGCPtrType); 379 return false; 380 } 381 382 // Returns true if this is a type which a) is a gc pointer or contains a GC 383 // pointer and b) is of a type which the code doesn't expect (i.e. first class 384 // aggregates). Used to trip assertions. 385 static bool isUnhandledGCPointerType(Type *Ty) { 386 return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); 387 } 388 #endif 389 390 // Return the name of the value suffixed with the provided value, or if the 391 // value didn't have a name, the default value specified. 392 static std::string suffixed_name_or(Value *V, StringRef Suffix, 393 StringRef DefaultName) { 394 return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str(); 395 } 396 397 // Helper function to print a live set, for debugging. 398 static void 399 printLiveSet(SetVector<Value *> &LiveSet) { 400 for (Value *V : LiveSet) 401 dbgs() << "\t" << *V << "\n"; 402 } 403 404 // Conservatively identifies any definitions which might be live at the 405 // given instruction. The analysis is performed immediately before the 406 // given instruction. Values defined by that instruction are not considered 407 // live. Values used by that instruction are considered live. 408 static void 409 analyzeParsePointLiveness(DominatorTree &DT, 410 GCPtrLivenessData &OriginalLivenessData, 411 SetVector<Value *> &AddrTakenAllocas, CallBase *Call, 412 PartiallyConstructedSafepointRecord &Result, 413 SetVector<Value *> &AllAllocas, 414 DefiningValueMapTy &DVCache) { 415 StatepointLiveSetTy LiveSet; 416 findLiveSetAtInst(Call, OriginalLivenessData, AddrTakenAllocas, 417 LiveSet, AllAllocas, DVCache); 418 419 if (PrintLiveSet) { 420 dbgs() << "Live Variables at " << *Call << ":\n"; 421 printLiveSet(LiveSet); 422 } 423 Result.LiveSet = LiveSet; 424 } 425 426 static bool isKnownBaseResult(Value *V); 427 428 namespace { 429 430 /// A single base defining value - An immediate base defining value for an 431 /// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'. 432 /// For instructions which have multiple pointer [vector] inputs or that 433 /// transition between vector and scalar types, there is no immediate base 434 /// defining value. The 'base defining value' for 'Def' is the transitive 435 /// closure of this relation stopping at the first instruction which has no 436 /// immediate base defining value. The b.d.v. might itself be a base pointer, 437 /// but it can also be an arbitrary derived pointer. 438 struct BaseDefiningValueResult { 439 /// Contains the value which is the base defining value. 440 Value * const BDV; 441 442 /// True if the base defining value is also known to be an actual base 443 /// pointer. 444 const bool IsKnownBase; 445 446 BaseDefiningValueResult(Value *BDV, bool IsKnownBase) 447 : BDV(BDV), IsKnownBase(IsKnownBase) { 448 #ifndef NDEBUG 449 // Check consistency between new and old means of checking whether a BDV is 450 // a base. 451 bool MustBeBase = isKnownBaseResult(BDV); 452 assert(!MustBeBase || MustBeBase == IsKnownBase); 453 #endif 454 } 455 }; 456 457 } // end anonymous namespace 458 459 static BaseDefiningValueResult findBaseDefiningValue(Value *I); 460 461 /// Return a base defining value for the 'Index' element of the given vector 462 /// instruction 'I'. If Index is null, returns a BDV for the entire vector 463 /// 'I'. As an optimization, this method will try to determine when the 464 /// element is known to already be a base pointer. If this can be established, 465 /// the second value in the returned pair will be true. Note that either a 466 /// vector or a pointer typed value can be returned. For the former, the 467 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'. 468 /// If the later, the return pointer is a BDV (or possibly a base) for the 469 /// particular element in 'I'. 470 static BaseDefiningValueResult 471 findBaseDefiningValueOfVector(Value *I) { 472 // Each case parallels findBaseDefiningValue below, see that code for 473 // detailed motivation. 474 475 if (isa<Argument>(I)) 476 // An incoming argument to the function is a base pointer 477 return BaseDefiningValueResult(I, true); 478 479 if (isa<Constant>(I)) 480 // Base of constant vector consists only of constant null pointers. 481 // For reasoning see similar case inside 'findBaseDefiningValue' function. 482 return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()), 483 true); 484 485 if (isa<LoadInst>(I)) 486 return BaseDefiningValueResult(I, true); 487 488 if (isa<InsertElementInst>(I)) 489 // We don't know whether this vector contains entirely base pointers or 490 // not. To be conservatively correct, we treat it as a BDV and will 491 // duplicate code as needed to construct a parallel vector of bases. 492 return BaseDefiningValueResult(I, false); 493 494 if (isa<ShuffleVectorInst>(I)) 495 // We don't know whether this vector contains entirely base pointers or 496 // not. To be conservatively correct, we treat it as a BDV and will 497 // duplicate code as needed to construct a parallel vector of bases. 498 // TODO: There a number of local optimizations which could be applied here 499 // for particular sufflevector patterns. 500 return BaseDefiningValueResult(I, false); 501 502 // The behavior of getelementptr instructions is the same for vector and 503 // non-vector data types. 504 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) 505 return findBaseDefiningValue(GEP->getPointerOperand()); 506 507 // If the pointer comes through a bitcast of a vector of pointers to 508 // a vector of another type of pointer, then look through the bitcast 509 if (auto *BC = dyn_cast<BitCastInst>(I)) 510 return findBaseDefiningValue(BC->getOperand(0)); 511 512 // We assume that functions in the source language only return base 513 // pointers. This should probably be generalized via attributes to support 514 // both source language and internal functions. 515 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 516 return BaseDefiningValueResult(I, true); 517 518 // A PHI or Select is a base defining value. The outer findBasePointer 519 // algorithm is responsible for constructing a base value for this BDV. 520 assert((isa<SelectInst>(I) || isa<PHINode>(I)) && 521 "unknown vector instruction - no base found for vector element"); 522 return BaseDefiningValueResult(I, false); 523 } 524 525 /// Helper function for findBasePointer - Will return a value which either a) 526 /// defines the base pointer for the input, b) blocks the simple search 527 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change 528 /// from pointer to vector type or back. 529 static BaseDefiningValueResult findBaseDefiningValue(Value *I) { 530 if (I->getType()->isStructTy()) 531 // Assuming FCA is always base. 532 // FCAs appear mostly in the call sequnce where we pass/return multiple 533 // values in registers, e.g. { i8*, i64 }. If it contains the address of 534 // an alloca, the alloca should already be address taken (at least when 535 // creating the FCA), so we don't need to link the FCA back to the alloca. 536 // It is also unlikely to contain past-the-end pointer (we cannot do 537 // pointer arithmetic directly with FCA). So it is safe to treat FCA as 538 // base. 539 return BaseDefiningValueResult(I, true); 540 541 assert(I->getType()->isPtrOrPtrVectorTy() && 542 "Illegal to ask for the base pointer of a non-pointer type"); 543 544 if (I->getType()->isVectorTy()) 545 return findBaseDefiningValueOfVector(I); 546 547 if (isa<Argument>(I)) 548 // An incoming argument to the function is a base pointer 549 // We should have never reached here if this argument isn't an gc value 550 return BaseDefiningValueResult(I, true); 551 552 if (isa<Constant>(I)) { 553 // We assume that objects with a constant base (e.g. a global) can't move 554 // and don't need to be reported to the collector because they are always 555 // live. Besides global references, all kinds of constants (e.g. undef, 556 // constant expressions, null pointers) can be introduced by the inliner or 557 // the optimizer, especially on dynamically dead paths. 558 // Here we treat all of them as having single null base. By doing this we 559 // trying to avoid problems reporting various conflicts in a form of 560 // "phi (const1, const2)" or "phi (const, regular gc ptr)". 561 // See constant.ll file for relevant test cases. 562 563 return BaseDefiningValueResult( 564 ConstantPointerNull::get(cast<PointerType>(I->getType())), true); 565 } 566 567 if (CastInst *CI = dyn_cast<CastInst>(I)) { 568 Value *Def = CI->stripPointerCasts(); 569 if (isa<IntToPtrInst>(Def)) 570 // Pointer converted from integer is a base. 571 return BaseDefiningValueResult(Def, true); 572 573 // Pointer-to-pointer and int-to-pointer casts are handled above. 574 // We don't know how to handle other type of casts. 575 assert(!isa<CastInst>(Def) && "shouldn't find another cast here"); 576 return findBaseDefiningValue(Def); 577 } 578 579 if (isa<AllocaInst>(I)) 580 // alloca is a gc base 581 return BaseDefiningValueResult(I, true); 582 583 if (isa<LoadInst>(I)) 584 // The value loaded is a gc base itself 585 return BaseDefiningValueResult(I, true); 586 587 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) 588 // The base of this GEP is the base 589 return findBaseDefiningValue(GEP->getPointerOperand()); 590 591 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 592 switch (II->getIntrinsicID()) { 593 default: 594 // fall through to general call handling 595 break; 596 case Intrinsic::experimental_gc_statepoint: 597 llvm_unreachable("statepoints don't produce pointers"); 598 case Intrinsic::experimental_gc_relocate: 599 // Rerunning safepoint insertion after safepoints are already 600 // inserted is not supported. It could probably be made to work, 601 // but why are you doing this? There's no good reason. 602 llvm_unreachable("repeat safepoint insertion is not supported"); 603 case Intrinsic::gcroot: 604 // Currently, this mechanism hasn't been extended to work with gcroot. 605 // There's no reason it couldn't be, but I haven't thought about the 606 // implications much. 607 llvm_unreachable( 608 "interaction with the gcroot mechanism is not supported"); 609 } 610 } 611 // We assume that functions in the source language only return base 612 // pointers. This should probably be generalized via attributes to support 613 // both source language and internal functions. 614 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 615 return BaseDefiningValueResult(I, true); 616 617 // TODO: I have absolutely no idea how to implement this part yet. It's not 618 // necessarily hard, I just haven't really looked at it yet. 619 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented"); 620 621 if (isa<AtomicCmpXchgInst>(I)) 622 // A CAS is effectively a atomic store and load combined under a 623 // predicate. From the perspective of base pointers, we just treat it 624 // like a load. 625 return BaseDefiningValueResult(I, true); 626 627 assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are " 628 "binary ops which don't apply to pointers"); 629 630 // The aggregate ops. Aggregates can either be in the heap or on the 631 // stack, but in either case, this is simply a field load. As a result, 632 // this is a defining definition of the base just like a load is. 633 if (isa<ExtractValueInst>(I)) 634 return BaseDefiningValueResult(I, true); 635 636 // We should never see an insert vector since that would require we be 637 // tracing back a struct value not a pointer value. 638 assert(!isa<InsertValueInst>(I) && 639 "Base pointer for a struct is meaningless"); 640 641 // An extractelement produces a base result exactly when it's input does. 642 // We may need to insert a parallel instruction to extract the appropriate 643 // element out of the base vector corresponding to the input. Given this, 644 // it's analogous to the phi and select case even though it's not a merge. 645 if (isa<ExtractElementInst>(I)) 646 // Note: There a lot of obvious peephole cases here. This are deliberately 647 // handled after the main base pointer inference algorithm to make writing 648 // test cases to exercise that code easier. 649 return BaseDefiningValueResult(I, false); 650 651 // The last two cases here don't return a base pointer. Instead, they 652 // return a value which dynamically selects from among several base 653 // derived pointers (each with it's own base potentially). It's the job of 654 // the caller to resolve these. 655 assert((isa<SelectInst>(I) || isa<PHINode>(I)) && 656 "missing instruction case in findBaseDefiningValing"); 657 return BaseDefiningValueResult(I, false); 658 } 659 660 /// Returns the base defining value for this value. 661 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { 662 Value *&Cached = Cache[I]; 663 if (!Cached) { 664 Cached = findBaseDefiningValue(I).BDV; 665 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> " 666 << Cached->getName() << "\n"); 667 } 668 assert(Cache[I] != nullptr); 669 return Cached; 670 } 671 672 /// Return a base pointer for this value if known. Otherwise, return it's 673 /// base defining value. 674 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { 675 Value *Def = findBaseDefiningValueCached(I, Cache); 676 auto Found = Cache.find(Def); 677 if (Found != Cache.end()) { 678 // Either a base-of relation, or a self reference. Caller must check. 679 return Found->second; 680 } 681 // Only a BDV available 682 return Def; 683 } 684 685 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, 686 /// is it known to be a base pointer? Or do we need to continue searching. 687 static bool isKnownBaseResult(Value *V) { 688 if (!isa<PHINode>(V) && !isa<SelectInst>(V) && 689 !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) && 690 !isa<ShuffleVectorInst>(V)) { 691 // no recursion possible 692 return true; 693 } 694 if (isa<Instruction>(V) && 695 cast<Instruction>(V)->getMetadata("is_base_value")) { 696 // This is a previously inserted base phi or select. We know 697 // that this is a base value. 698 return true; 699 } 700 701 // We need to keep searching 702 return false; 703 } 704 705 namespace { 706 707 /// Models the state of a single base defining value in the findBasePointer 708 /// algorithm for determining where a new instruction is needed to propagate 709 /// the base of this BDV. 710 class BDVState { 711 public: 712 enum Status { Unknown, Base, Conflict }; 713 714 BDVState() : BaseValue(nullptr) {} 715 716 explicit BDVState(Status Status, Value *BaseValue = nullptr) 717 : Status(Status), BaseValue(BaseValue) { 718 assert(Status != Base || BaseValue); 719 } 720 721 explicit BDVState(Value *BaseValue) : Status(Base), BaseValue(BaseValue) {} 722 723 Status getStatus() const { return Status; } 724 Value *getBaseValue() const { return BaseValue; } 725 726 bool isBase() const { return getStatus() == Base; } 727 bool isUnknown() const { return getStatus() == Unknown; } 728 bool isConflict() const { return getStatus() == Conflict; } 729 730 bool operator==(const BDVState &Other) const { 731 return BaseValue == Other.BaseValue && Status == Other.Status; 732 } 733 734 bool operator!=(const BDVState &other) const { return !(*this == other); } 735 736 LLVM_DUMP_METHOD 737 void dump() const { 738 print(dbgs()); 739 dbgs() << '\n'; 740 } 741 742 void print(raw_ostream &OS) const { 743 switch (getStatus()) { 744 case Unknown: 745 OS << "U"; 746 break; 747 case Base: 748 OS << "B"; 749 break; 750 case Conflict: 751 OS << "C"; 752 break; 753 } 754 OS << " (" << getBaseValue() << " - " 755 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << "): "; 756 } 757 758 private: 759 Status Status = Unknown; 760 AssertingVH<Value> BaseValue; // Non-null only if Status == Base. 761 }; 762 763 } // end anonymous namespace 764 765 #ifndef NDEBUG 766 static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) { 767 State.print(OS); 768 return OS; 769 } 770 #endif 771 772 static BDVState meetBDVStateImpl(const BDVState &LHS, const BDVState &RHS) { 773 switch (LHS.getStatus()) { 774 case BDVState::Unknown: 775 return RHS; 776 777 case BDVState::Base: 778 assert(LHS.getBaseValue() && "can't be null"); 779 if (RHS.isUnknown()) 780 return LHS; 781 782 if (RHS.isBase()) { 783 if (LHS.getBaseValue() == RHS.getBaseValue()) { 784 assert(LHS == RHS && "equality broken!"); 785 return LHS; 786 } 787 return BDVState(BDVState::Conflict); 788 } 789 assert(RHS.isConflict() && "only three states!"); 790 return BDVState(BDVState::Conflict); 791 792 case BDVState::Conflict: 793 return LHS; 794 } 795 llvm_unreachable("only three states!"); 796 } 797 798 // Values of type BDVState form a lattice, and this function implements the meet 799 // operation. 800 static BDVState meetBDVState(const BDVState &LHS, const BDVState &RHS) { 801 BDVState Result = meetBDVStateImpl(LHS, RHS); 802 assert(Result == meetBDVStateImpl(RHS, LHS) && 803 "Math is wrong: meet does not commute!"); 804 return Result; 805 } 806 807 /// For a given value or instruction, figure out what base ptr its derived from. 808 /// For gc objects, this is simply itself. On success, returns a value which is 809 /// the base pointer. (This is reliable and can be used for relocation.) On 810 /// failure, returns nullptr. 811 static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) { 812 Value *Def = findBaseOrBDV(I, Cache); 813 814 if (isKnownBaseResult(Def)) 815 return Def; 816 817 // Here's the rough algorithm: 818 // - For every SSA value, construct a mapping to either an actual base 819 // pointer or a PHI which obscures the base pointer. 820 // - Construct a mapping from PHI to unknown TOP state. Use an 821 // optimistic algorithm to propagate base pointer information. Lattice 822 // looks like: 823 // UNKNOWN 824 // b1 b2 b3 b4 825 // CONFLICT 826 // When algorithm terminates, all PHIs will either have a single concrete 827 // base or be in a conflict state. 828 // - For every conflict, insert a dummy PHI node without arguments. Add 829 // these to the base[Instruction] = BasePtr mapping. For every 830 // non-conflict, add the actual base. 831 // - For every conflict, add arguments for the base[a] of each input 832 // arguments. 833 // 834 // Note: A simpler form of this would be to add the conflict form of all 835 // PHIs without running the optimistic algorithm. This would be 836 // analogous to pessimistic data flow and would likely lead to an 837 // overall worse solution. 838 839 #ifndef NDEBUG 840 auto isExpectedBDVType = [](Value *BDV) { 841 return isa<PHINode>(BDV) || isa<SelectInst>(BDV) || 842 isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV) || 843 isa<ShuffleVectorInst>(BDV); 844 }; 845 #endif 846 847 // Once populated, will contain a mapping from each potentially non-base BDV 848 // to a lattice value (described above) which corresponds to that BDV. 849 // We use the order of insertion (DFS over the def/use graph) to provide a 850 // stable deterministic ordering for visiting DenseMaps (which are unordered) 851 // below. This is important for deterministic compilation. 852 MapVector<Value *, BDVState> States; 853 854 // Recursively fill in all base defining values reachable from the initial 855 // one for which we don't already know a definite base value for 856 /* scope */ { 857 SmallVector<Value*, 16> Worklist; 858 Worklist.push_back(Def); 859 States.insert({Def, BDVState()}); 860 while (!Worklist.empty()) { 861 Value *Current = Worklist.pop_back_val(); 862 assert(!isKnownBaseResult(Current) && "why did it get added?"); 863 864 auto visitIncomingValue = [&](Value *InVal) { 865 Value *Base = findBaseOrBDV(InVal, Cache); 866 if (isKnownBaseResult(Base)) 867 // Known bases won't need new instructions introduced and can be 868 // ignored safely 869 return; 870 assert(isExpectedBDVType(Base) && "the only non-base values " 871 "we see should be base defining values"); 872 if (States.insert(std::make_pair(Base, BDVState())).second) 873 Worklist.push_back(Base); 874 }; 875 if (PHINode *PN = dyn_cast<PHINode>(Current)) { 876 for (Value *InVal : PN->incoming_values()) 877 visitIncomingValue(InVal); 878 } else if (SelectInst *SI = dyn_cast<SelectInst>(Current)) { 879 visitIncomingValue(SI->getTrueValue()); 880 visitIncomingValue(SI->getFalseValue()); 881 } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) { 882 visitIncomingValue(EE->getVectorOperand()); 883 } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) { 884 visitIncomingValue(IE->getOperand(0)); // vector operand 885 visitIncomingValue(IE->getOperand(1)); // scalar operand 886 } else if (auto *SV = dyn_cast<ShuffleVectorInst>(Current)) { 887 visitIncomingValue(SV->getOperand(0)); 888 visitIncomingValue(SV->getOperand(1)); 889 } 890 else { 891 llvm_unreachable("Unimplemented instruction case"); 892 } 893 } 894 } 895 896 #ifndef NDEBUG 897 LLVM_DEBUG(dbgs() << "States after initialization:\n"); 898 for (auto Pair : States) { 899 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); 900 } 901 #endif 902 903 // Return a phi state for a base defining value. We'll generate a new 904 // base state for known bases and expect to find a cached state otherwise. 905 auto getStateForBDV = [&](Value *baseValue) { 906 if (isKnownBaseResult(baseValue)) 907 return BDVState(baseValue); 908 auto I = States.find(baseValue); 909 assert(I != States.end() && "lookup failed!"); 910 return I->second; 911 }; 912 913 bool Progress = true; 914 while (Progress) { 915 #ifndef NDEBUG 916 const size_t OldSize = States.size(); 917 #endif 918 Progress = false; 919 // We're only changing values in this loop, thus safe to keep iterators. 920 // Since this is computing a fixed point, the order of visit does not 921 // effect the result. TODO: We could use a worklist here and make this run 922 // much faster. 923 for (auto Pair : States) { 924 Value *BDV = Pair.first; 925 assert(!isKnownBaseResult(BDV) && "why did it get added?"); 926 927 // Given an input value for the current instruction, return a BDVState 928 // instance which represents the BDV of that value. 929 auto getStateForInput = [&](Value *V) mutable { 930 Value *BDV = findBaseOrBDV(V, Cache); 931 return getStateForBDV(BDV); 932 }; 933 934 BDVState NewState; 935 if (SelectInst *SI = dyn_cast<SelectInst>(BDV)) { 936 NewState = meetBDVState(NewState, getStateForInput(SI->getTrueValue())); 937 NewState = 938 meetBDVState(NewState, getStateForInput(SI->getFalseValue())); 939 } else if (PHINode *PN = dyn_cast<PHINode>(BDV)) { 940 for (Value *Val : PN->incoming_values()) 941 NewState = meetBDVState(NewState, getStateForInput(Val)); 942 } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) { 943 // The 'meet' for an extractelement is slightly trivial, but it's still 944 // useful in that it drives us to conflict if our input is. 945 NewState = 946 meetBDVState(NewState, getStateForInput(EE->getVectorOperand())); 947 } else if (auto *IE = dyn_cast<InsertElementInst>(BDV)){ 948 // Given there's a inherent type mismatch between the operands, will 949 // *always* produce Conflict. 950 NewState = meetBDVState(NewState, getStateForInput(IE->getOperand(0))); 951 NewState = meetBDVState(NewState, getStateForInput(IE->getOperand(1))); 952 } else { 953 // The only instance this does not return a Conflict is when both the 954 // vector operands are the same vector. 955 auto *SV = cast<ShuffleVectorInst>(BDV); 956 NewState = meetBDVState(NewState, getStateForInput(SV->getOperand(0))); 957 NewState = meetBDVState(NewState, getStateForInput(SV->getOperand(1))); 958 } 959 960 BDVState OldState = States[BDV]; 961 if (OldState != NewState) { 962 Progress = true; 963 States[BDV] = NewState; 964 } 965 } 966 967 assert(OldSize == States.size() && 968 "fixed point shouldn't be adding any new nodes to state"); 969 } 970 971 #ifndef NDEBUG 972 LLVM_DEBUG(dbgs() << "States after meet iteration:\n"); 973 for (auto Pair : States) { 974 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); 975 } 976 #endif 977 978 // Insert Phis for all conflicts 979 // TODO: adjust naming patterns to avoid this order of iteration dependency 980 for (auto Pair : States) { 981 Instruction *I = cast<Instruction>(Pair.first); 982 BDVState State = Pair.second; 983 assert(!isKnownBaseResult(I) && "why did it get added?"); 984 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); 985 986 // extractelement instructions are a bit special in that we may need to 987 // insert an extract even when we know an exact base for the instruction. 988 // The problem is that we need to convert from a vector base to a scalar 989 // base for the particular indice we're interested in. 990 if (State.isBase() && isa<ExtractElementInst>(I) && 991 isa<VectorType>(State.getBaseValue()->getType())) { 992 auto *EE = cast<ExtractElementInst>(I); 993 // TODO: In many cases, the new instruction is just EE itself. We should 994 // exploit this, but can't do it here since it would break the invariant 995 // about the BDV not being known to be a base. 996 auto *BaseInst = ExtractElementInst::Create( 997 State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE); 998 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); 999 States[I] = BDVState(BDVState::Base, BaseInst); 1000 } 1001 1002 // Since we're joining a vector and scalar base, they can never be the 1003 // same. As a result, we should always see insert element having reached 1004 // the conflict state. 1005 assert(!isa<InsertElementInst>(I) || State.isConflict()); 1006 1007 if (!State.isConflict()) 1008 continue; 1009 1010 /// Create and insert a new instruction which will represent the base of 1011 /// the given instruction 'I'. 1012 auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* { 1013 if (isa<PHINode>(I)) { 1014 BasicBlock *BB = I->getParent(); 1015 int NumPreds = pred_size(BB); 1016 assert(NumPreds > 0 && "how did we reach here"); 1017 std::string Name = suffixed_name_or(I, ".base", "base_phi"); 1018 return PHINode::Create(I->getType(), NumPreds, Name, I); 1019 } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 1020 // The undef will be replaced later 1021 UndefValue *Undef = UndefValue::get(SI->getType()); 1022 std::string Name = suffixed_name_or(I, ".base", "base_select"); 1023 return SelectInst::Create(SI->getCondition(), Undef, Undef, Name, SI); 1024 } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) { 1025 UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType()); 1026 std::string Name = suffixed_name_or(I, ".base", "base_ee"); 1027 return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name, 1028 EE); 1029 } else if (auto *IE = dyn_cast<InsertElementInst>(I)) { 1030 UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType()); 1031 UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType()); 1032 std::string Name = suffixed_name_or(I, ".base", "base_ie"); 1033 return InsertElementInst::Create(VecUndef, ScalarUndef, 1034 IE->getOperand(2), Name, IE); 1035 } else { 1036 auto *SV = cast<ShuffleVectorInst>(I); 1037 UndefValue *VecUndef = UndefValue::get(SV->getOperand(0)->getType()); 1038 std::string Name = suffixed_name_or(I, ".base", "base_sv"); 1039 return new ShuffleVectorInst(VecUndef, VecUndef, SV->getOperand(2), 1040 Name, SV); 1041 } 1042 }; 1043 Instruction *BaseInst = MakeBaseInstPlaceholder(I); 1044 // Add metadata marking this as a base value 1045 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); 1046 States[I] = BDVState(BDVState::Conflict, BaseInst); 1047 } 1048 1049 // Returns a instruction which produces the base pointer for a given 1050 // instruction. The instruction is assumed to be an input to one of the BDVs 1051 // seen in the inference algorithm above. As such, we must either already 1052 // know it's base defining value is a base, or have inserted a new 1053 // instruction to propagate the base of it's BDV and have entered that newly 1054 // introduced instruction into the state table. In either case, we are 1055 // assured to be able to determine an instruction which produces it's base 1056 // pointer. 1057 auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) { 1058 Value *BDV = findBaseOrBDV(Input, Cache); 1059 Value *Base = nullptr; 1060 if (isKnownBaseResult(BDV)) { 1061 Base = BDV; 1062 } else { 1063 // Either conflict or base. 1064 assert(States.count(BDV)); 1065 Base = States[BDV].getBaseValue(); 1066 } 1067 assert(Base && "Can't be null"); 1068 // The cast is needed since base traversal may strip away bitcasts 1069 if (Base->getType() != Input->getType() && InsertPt) 1070 Base = CastInst::CreatePointerBitCastOrAddrSpaceCast(Base, Input->getType(), "cast", InsertPt); 1071 return Base; 1072 }; 1073 1074 // Fixup all the inputs of the new PHIs. Visit order needs to be 1075 // deterministic and predictable because we're naming newly created 1076 // instructions. 1077 for (auto Pair : States) { 1078 Instruction *BDV = cast<Instruction>(Pair.first); 1079 BDVState State = Pair.second; 1080 1081 assert(!isKnownBaseResult(BDV) && "why did it get added?"); 1082 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); 1083 if (!State.isConflict()) 1084 continue; 1085 1086 if (PHINode *BasePHI = dyn_cast<PHINode>(State.getBaseValue())) { 1087 PHINode *PN = cast<PHINode>(BDV); 1088 unsigned NumPHIValues = PN->getNumIncomingValues(); 1089 for (unsigned i = 0; i < NumPHIValues; i++) { 1090 Value *InVal = PN->getIncomingValue(i); 1091 BasicBlock *InBB = PN->getIncomingBlock(i); 1092 1093 // If we've already seen InBB, add the same incoming value 1094 // we added for it earlier. The IR verifier requires phi 1095 // nodes with multiple entries from the same basic block 1096 // to have the same incoming value for each of those 1097 // entries. If we don't do this check here and basephi 1098 // has a different type than base, we'll end up adding two 1099 // bitcasts (and hence two distinct values) as incoming 1100 // values for the same basic block. 1101 1102 int BlockIndex = BasePHI->getBasicBlockIndex(InBB); 1103 if (BlockIndex != -1) { 1104 Value *OldBase = BasePHI->getIncomingValue(BlockIndex); 1105 BasePHI->addIncoming(OldBase, InBB); 1106 1107 #ifndef NDEBUG 1108 Value *Base = getBaseForInput(InVal, nullptr); 1109 // In essence this assert states: the only way two values 1110 // incoming from the same basic block may be different is by 1111 // being different bitcasts of the same value. A cleanup 1112 // that remains TODO is changing findBaseOrBDV to return an 1113 // llvm::Value of the correct type (and still remain pure). 1114 // This will remove the need to add bitcasts. 1115 assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() && 1116 "Sanity -- findBaseOrBDV should be pure!"); 1117 #endif 1118 continue; 1119 } 1120 1121 // Find the instruction which produces the base for each input. We may 1122 // need to insert a bitcast in the incoming block. 1123 // TODO: Need to split critical edges if insertion is needed 1124 Value *Base = getBaseForInput(InVal, InBB->getTerminator()); 1125 BasePHI->addIncoming(Base, InBB); 1126 } 1127 assert(BasePHI->getNumIncomingValues() == NumPHIValues); 1128 } else if (SelectInst *BaseSI = 1129 dyn_cast<SelectInst>(State.getBaseValue())) { 1130 SelectInst *SI = cast<SelectInst>(BDV); 1131 1132 // Find the instruction which produces the base for each input. 1133 // We may need to insert a bitcast. 1134 BaseSI->setTrueValue(getBaseForInput(SI->getTrueValue(), BaseSI)); 1135 BaseSI->setFalseValue(getBaseForInput(SI->getFalseValue(), BaseSI)); 1136 } else if (auto *BaseEE = 1137 dyn_cast<ExtractElementInst>(State.getBaseValue())) { 1138 Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand(); 1139 // Find the instruction which produces the base for each input. We may 1140 // need to insert a bitcast. 1141 BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE)); 1142 } else if (auto *BaseIE = dyn_cast<InsertElementInst>(State.getBaseValue())){ 1143 auto *BdvIE = cast<InsertElementInst>(BDV); 1144 auto UpdateOperand = [&](int OperandIdx) { 1145 Value *InVal = BdvIE->getOperand(OperandIdx); 1146 Value *Base = getBaseForInput(InVal, BaseIE); 1147 BaseIE->setOperand(OperandIdx, Base); 1148 }; 1149 UpdateOperand(0); // vector operand 1150 UpdateOperand(1); // scalar operand 1151 } else { 1152 auto *BaseSV = cast<ShuffleVectorInst>(State.getBaseValue()); 1153 auto *BdvSV = cast<ShuffleVectorInst>(BDV); 1154 auto UpdateOperand = [&](int OperandIdx) { 1155 Value *InVal = BdvSV->getOperand(OperandIdx); 1156 Value *Base = getBaseForInput(InVal, BaseSV); 1157 BaseSV->setOperand(OperandIdx, Base); 1158 }; 1159 UpdateOperand(0); // vector operand 1160 UpdateOperand(1); // vector operand 1161 } 1162 } 1163 1164 // Cache all of our results so we can cheaply reuse them 1165 // NOTE: This is actually two caches: one of the base defining value 1166 // relation and one of the base pointer relation! FIXME 1167 for (auto Pair : States) { 1168 auto *BDV = Pair.first; 1169 Value *Base = Pair.second.getBaseValue(); 1170 assert(BDV && Base); 1171 assert(!isKnownBaseResult(BDV) && "why did it get added?"); 1172 1173 LLVM_DEBUG( 1174 dbgs() << "Updating base value cache" 1175 << " for: " << BDV->getName() << " from: " 1176 << (Cache.count(BDV) ? Cache[BDV]->getName().str() : "none") 1177 << " to: " << Base->getName() << "\n"); 1178 1179 if (Cache.count(BDV)) { 1180 assert(isKnownBaseResult(Base) && 1181 "must be something we 'know' is a base pointer"); 1182 // Once we transition from the BDV relation being store in the Cache to 1183 // the base relation being stored, it must be stable 1184 assert((!isKnownBaseResult(Cache[BDV]) || Cache[BDV] == Base) && 1185 "base relation should be stable"); 1186 } 1187 Cache[BDV] = Base; 1188 } 1189 assert(Cache.count(Def)); 1190 return Cache[Def]; 1191 } 1192 1193 // For a set of live pointers (base and/or derived), identify the base 1194 // pointer of the object which they are derived from. This routine will 1195 // mutate the IR graph as needed to make the 'base' pointer live at the 1196 // definition site of 'derived'. This ensures that any use of 'derived' can 1197 // also use 'base'. This may involve the insertion of a number of 1198 // additional PHI nodes. 1199 // 1200 // preconditions: live is a set of pointer type Values 1201 // 1202 // side effects: may insert PHI nodes into the existing CFG, will preserve 1203 // CFG, will not remove or mutate any existing nodes 1204 // 1205 // post condition: PointerToBase contains one (derived, base) pair for every 1206 // pointer in live. Note that derived can be equal to base if the original 1207 // pointer was a base pointer. 1208 static void 1209 findBasePointers(const StatepointLiveSetTy &live, 1210 MapVector<Value *, Value *> &PointerToBase, 1211 DominatorTree *DT, DefiningValueMapTy &DVCache) { 1212 for (Value *ptr : live) { 1213 Value *base = findBasePointer(ptr, DVCache); 1214 assert(base && "failed to find base pointer"); 1215 PointerToBase[ptr] = base; 1216 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) || 1217 DT->dominates(cast<Instruction>(base)->getParent(), 1218 cast<Instruction>(ptr)->getParent())) && 1219 "The base we found better dominate the derived pointer"); 1220 } 1221 } 1222 1223 /// Find the required based pointers (and adjust the live set) for the given 1224 /// parse point. 1225 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, 1226 CallBase *Call, 1227 PartiallyConstructedSafepointRecord &result) { 1228 MapVector<Value *, Value *> PointerToBase; 1229 findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache); 1230 1231 result.PointerToBase = PointerToBase; 1232 } 1233 1234 // When inserting gc.relocate and gc.result calls, we need to ensure there are 1235 // no uses of the original value / return value between the gc.statepoint and 1236 // the gc.relocate / gc.result call. One case which can arise is a phi node 1237 // starting one of the successor blocks. We also need to be able to insert the 1238 // gc.relocates only on the path which goes through the statepoint. We might 1239 // need to split an edge to make this possible. 1240 static BasicBlock * 1241 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, 1242 DominatorTree &DT) { 1243 BasicBlock *Ret = BB; 1244 if (!BB->getUniquePredecessor()) 1245 Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT); 1246 1247 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes 1248 // from it 1249 FoldSingleEntryPHINodes(Ret); 1250 assert(!isa<PHINode>(Ret->begin()) && 1251 "All PHI nodes should have been removed!"); 1252 1253 // At this point, we can safely insert a gc.relocate or gc.result as the first 1254 // instruction in Ret if needed. 1255 return Ret; 1256 } 1257 1258 // List of all function attributes which must be stripped when lowering from 1259 // abstract machine model to physical machine model. Essentially, these are 1260 // all the effects a safepoint might have which we ignored in the abstract 1261 // machine model for purposes of optimization. We have to strip these on 1262 // both function declarations and call sites. 1263 static constexpr Attribute::AttrKind FnAttrsToStrip[] = 1264 {Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly, 1265 Attribute::ArgMemOnly, Attribute::InaccessibleMemOnly, 1266 Attribute::InaccessibleMemOrArgMemOnly, 1267 Attribute::NoSync, Attribute::NoFree}; 1268 1269 // List of all parameter and return attributes which must be stripped when 1270 // lowering from the abstract machine model. Note that we list attributes 1271 // here which aren't valid as return attributes, that is okay. There are 1272 // also some additional attributes with arguments which are handled 1273 // explicitly and are not in this list. 1274 static constexpr Attribute::AttrKind ParamAttrsToStrip[] = 1275 {Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly, 1276 Attribute::NoAlias, Attribute::NoFree}; 1277 1278 // Create new attribute set containing only attributes which can be transferred 1279 // from original call to the safepoint. 1280 static AttributeList legalizeCallAttributes(LLVMContext &Ctx, 1281 AttributeList AL) { 1282 if (AL.isEmpty()) 1283 return AL; 1284 1285 // Remove the readonly, readnone, and statepoint function attributes. 1286 AttrBuilder FnAttrs(Ctx, AL.getFnAttrs()); 1287 for (auto Attr : FnAttrsToStrip) 1288 FnAttrs.removeAttribute(Attr); 1289 1290 for (Attribute A : AL.getFnAttrs()) { 1291 if (isStatepointDirectiveAttr(A)) 1292 FnAttrs.removeAttribute(A); 1293 } 1294 1295 // Just skip parameter and return attributes for now 1296 return AttributeList::get(Ctx, AttributeList::FunctionIndex, 1297 AttributeSet::get(Ctx, FnAttrs)); 1298 } 1299 1300 namespace { 1301 1302 /// This struct is used to defer RAUWs and `eraseFromParent` s. Using this 1303 /// avoids having to worry about keeping around dangling pointers to Values. 1304 class DeferredReplacement { 1305 AssertingVH<Instruction> Old; 1306 AssertingVH<Instruction> New; 1307 bool IsDeoptimize = false; 1308 1309 DeferredReplacement() = default; 1310 1311 public: 1312 static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) { 1313 assert(Old != New && Old && New && 1314 "Cannot RAUW equal values or to / from null!"); 1315 1316 DeferredReplacement D; 1317 D.Old = Old; 1318 D.New = New; 1319 return D; 1320 } 1321 1322 static DeferredReplacement createDelete(Instruction *ToErase) { 1323 DeferredReplacement D; 1324 D.Old = ToErase; 1325 return D; 1326 } 1327 1328 static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) { 1329 #ifndef NDEBUG 1330 auto *F = cast<CallInst>(Old)->getCalledFunction(); 1331 assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize && 1332 "Only way to construct a deoptimize deferred replacement"); 1333 #endif 1334 DeferredReplacement D; 1335 D.Old = Old; 1336 D.IsDeoptimize = true; 1337 return D; 1338 } 1339 1340 /// Does the task represented by this instance. 1341 void doReplacement() { 1342 Instruction *OldI = Old; 1343 Instruction *NewI = New; 1344 1345 assert(OldI != NewI && "Disallowed at construction?!"); 1346 assert((!IsDeoptimize || !New) && 1347 "Deoptimize intrinsics are not replaced!"); 1348 1349 Old = nullptr; 1350 New = nullptr; 1351 1352 if (NewI) 1353 OldI->replaceAllUsesWith(NewI); 1354 1355 if (IsDeoptimize) { 1356 // Note: we've inserted instructions, so the call to llvm.deoptimize may 1357 // not necessarily be followed by the matching return. 1358 auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator()); 1359 new UnreachableInst(RI->getContext(), RI); 1360 RI->eraseFromParent(); 1361 } 1362 1363 OldI->eraseFromParent(); 1364 } 1365 }; 1366 1367 } // end anonymous namespace 1368 1369 /// A unique function which doesn't require we sort the input vector. 1370 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) { 1371 SmallSet<T, 8> Seen; 1372 Vec.erase(remove_if(Vec, [&](const T &V) { return !Seen.insert(V).second; }), 1373 Vec.end()); 1374 } 1375 1376 // Attach the stack map to the statepoint. The statepoint is an invoke 1377 // with the given landing pad. The stack map (pointer) is attached as 1378 // the type info of the landing pad. 1379 static void 1380 attachStackMap(uint64_t StatepointID, Instruction *LandingPad) { 1381 if (cast<LandingPadInst>(LandingPad)->isCleanup()) 1382 return; 1383 Module *M = LandingPad->getModule(); 1384 std::string Name = (Twine(GO_STACKMAP_SYM_PREFIX) + Twine(StatepointID)).str(); 1385 Constant *C = M->getOrInsertGlobal(Name, Type::getInt64Ty(M->getContext())); 1386 LandingPad->setOperand(0, C); 1387 } 1388 1389 // Extract pointer fields from an FCA. 1390 static void 1391 extractPointerFromFCA(Value *V, IRBuilder<> &Builder, 1392 SmallVectorImpl<Value *> &PtrFields) { 1393 Type *T = V->getType(); 1394 assert(T->isStructTy()); 1395 for (unsigned i = 0, e = T->getStructNumElements(); i < e; ++i) { 1396 Type *ElemT = T->getStructElementType(i); 1397 if (ElemT->isPointerTy()) { 1398 Value *Field = Builder.CreateExtractValue(V, {i}); 1399 PtrFields.push_back(Field); 1400 } else 1401 assert(!hasPointer(ElemT) && "nested FCA is not supported"); 1402 } 1403 } 1404 1405 static Value *phiHasConstantValue(PHINode *Phi0); 1406 1407 static void 1408 makeStatepointExplicitImpl(CallBase *Call, /* to replace */ 1409 SmallVectorImpl<Value *> &BasePtrs, 1410 SmallVectorImpl<Value *> &LiveVariables, 1411 PartiallyConstructedSafepointRecord &Result, 1412 std::vector<DeferredReplacement> &Replacements) { 1413 assert(BasePtrs.size() == LiveVariables.size()); 1414 1415 // Then go ahead and use the builder do actually do the inserts. We insert 1416 // immediately before the previous instruction under the assumption that all 1417 // arguments will be available here. We can't insert afterwards since we may 1418 // be replacing a terminator. 1419 const DataLayout &DL = Call->getModule()->getDataLayout(); 1420 IRBuilder<> Builder(Call); 1421 1422 unique_unsorted(BasePtrs); 1423 1424 // For aggregate typed stack slots, attach a bitmap identifying its 1425 // pointer fields. 1426 SmallVector<Value *, 64> PtrFields; 1427 for (Value *V : BasePtrs) { 1428 if (auto *Phi = dyn_cast<PHINode>(V)) 1429 assert(!phiHasConstantValue(Phi) && "constant phi should not be in liveset"); 1430 if (isa<AllocaInst>(V) || 1431 (isa<Argument>(V) && cast<Argument>(V)->hasByValAttr())) { 1432 // Byval argument is at a fixed frame offset. Treat it the same as alloca. 1433 Type *T = cast<PointerType>(V->getType())->getElementType(); 1434 if (hasPointer(T)) { 1435 PtrFields.push_back(V); 1436 getPtrBitmapForType(T, DL, PtrFields); 1437 } 1438 } else if (V->getType()->isStructTy()) { 1439 // Statepoint lowering doesn't handle FCA. So we do it ourselves by 1440 // extracting all the pointer fields and letting the statepoint lowering 1441 // spill them. 1442 extractPointerFromFCA(V, Builder, PtrFields); 1443 } else 1444 PtrFields.push_back(V); 1445 } 1446 1447 ArrayRef<Value *> GCArgs(PtrFields); 1448 uint64_t StatepointID = ID; 1449 ID++; 1450 uint32_t NumPatchBytes = 0; 1451 uint32_t Flags = uint32_t(StatepointFlags::None); 1452 1453 ArrayRef<Use> CallArgs(Call->arg_begin(), Call->arg_end()); 1454 ArrayRef<Use> TransitionArgs; 1455 if (auto TransitionBundle = 1456 Call->getOperandBundle(LLVMContext::OB_gc_transition)) { 1457 Flags |= uint32_t(StatepointFlags::GCTransition); 1458 TransitionArgs = TransitionBundle->Inputs; 1459 } 1460 1461 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls 1462 // with a return value, we lower then as never returning calls to 1463 // __llvm_deoptimize that are followed by unreachable to get better codegen. 1464 bool IsDeoptimize = false; 1465 1466 StatepointDirectives SD = 1467 parseStatepointDirectivesFromAttrs(Call->getAttributes()); 1468 if (SD.NumPatchBytes) 1469 NumPatchBytes = *SD.NumPatchBytes; 1470 if (SD.StatepointID) 1471 StatepointID = *SD.StatepointID; 1472 1473 Value *CallTarget = Call->getCalledOperand(); 1474 if (Function *F = dyn_cast<Function>(CallTarget)) { 1475 if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) { 1476 // Calls to llvm.experimental.deoptimize are lowered to calls to the 1477 // __llvm_deoptimize symbol. We want to resolve this now, since the 1478 // verifier does not allow taking the address of an intrinsic function. 1479 1480 SmallVector<Type *, 8> DomainTy; 1481 for (Value *Arg : CallArgs) 1482 DomainTy.push_back(Arg->getType()); 1483 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy, 1484 /* isVarArg = */ false); 1485 1486 // Note: CallTarget can be a bitcast instruction of a symbol if there are 1487 // calls to @llvm.experimental.deoptimize with different argument types in 1488 // the same module. This is fine -- we assume the frontend knew what it 1489 // was doing when generating this kind of IR. 1490 CallTarget = F->getParent() 1491 ->getOrInsertFunction("__llvm_deoptimize", FTy).getCallee(); 1492 1493 IsDeoptimize = true; 1494 } 1495 } 1496 1497 // Create the statepoint given all the arguments 1498 GCStatepointInst *Token = nullptr; 1499 if (isa<CallInst>(Call)) { 1500 // We should have converted all statepoints to invoke. 1501 assert(false && "statepoint is not an invoke"); 1502 } else { 1503 InvokeInst *ToReplace = cast<InvokeInst>(Call); 1504 1505 // Insert the new invoke into the old block. We'll remove the old one in a 1506 // moment at which point this will become the new terminator for the 1507 // original block. 1508 1509 // Note (Go specific): 1510 // Here we attach GCArgs actually to the "deopt arg" slots, instead of 1511 // the "gc arg" slots, of the statepoint. Both are recorded in the stack 1512 // map the same way. The difference is that "deopt arg" doesn't need 1513 // relocation. We're implementing non-moving GC (for now). 1514 FunctionCallee FCallTarget(Call->getFunctionType(), 1515 Call->getCalledOperand()); 1516 InvokeInst *Invoke = Builder.CreateGCStatepointInvoke( 1517 StatepointID, NumPatchBytes, FCallTarget, ToReplace->getNormalDest(), 1518 ToReplace->getUnwindDest(), CallArgs, GCArgs, ArrayRef<Value*>(), 1519 "statepoint_token"); 1520 1521 Invoke->setCallingConv(ToReplace->getCallingConv()); 1522 1523 // Currently we will fail on parameter attributes and on certain 1524 // function attributes. In case if we can handle this set of attributes - 1525 // set up function attrs directly on statepoint and return attrs later for 1526 // gc_result intrinsic. 1527 Invoke->setAttributes( 1528 legalizeCallAttributes(Invoke->getContext(), ToReplace->getAttributes())); 1529 1530 Token = cast<GCStatepointInst>(Invoke); 1531 1532 // Generate gc relocates in exceptional path 1533 BasicBlock *UnwindBlock = ToReplace->getUnwindDest(); 1534 assert(!isa<PHINode>(UnwindBlock->begin()) && 1535 UnwindBlock->getUniquePredecessor() && 1536 "can't safely insert in this block!"); 1537 1538 Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt()); 1539 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); 1540 1541 // Attach exceptional gc relocates to the landingpad. 1542 Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst(); 1543 Result.UnwindToken = ExceptionalToken; 1544 1545 attachStackMap(StatepointID, ExceptionalToken); 1546 1547 BasicBlock *NormalDest = ToReplace->getNormalDest(); 1548 assert(!isa<PHINode>(NormalDest->begin()) && 1549 NormalDest->getUniquePredecessor() && 1550 "can't safely insert in this block!"); 1551 1552 Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt()); 1553 } 1554 assert(Token && "Should be set in one of the above branches!"); 1555 1556 if (IsDeoptimize) { 1557 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we 1558 // transform the tail-call like structure to a call to a void function 1559 // followed by unreachable to get better codegen. 1560 Replacements.push_back( 1561 DeferredReplacement::createDeoptimizeReplacement(Call)); 1562 } else { 1563 Token->setName("statepoint_token"); 1564 if (!Call->getType()->isVoidTy() && !Call->use_empty()) { 1565 StringRef Name = 1566 Call->hasName() ? Call->getName() : ""; 1567 CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name); 1568 GCResult->setAttributes( 1569 AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex, 1570 Call->getAttributes().getRetAttrs())); 1571 1572 // We cannot RAUW or delete CS.getInstruction() because it could be in the 1573 // live set of some other safepoint, in which case that safepoint's 1574 // PartiallyConstructedSafepointRecord will hold a raw pointer to this 1575 // llvm::Instruction. Instead, we defer the replacement and deletion to 1576 // after the live sets have been made explicit in the IR, and we no longer 1577 // have raw pointers to worry about. 1578 Replacements.emplace_back( 1579 DeferredReplacement::createRAUW(Call, GCResult)); 1580 } else { 1581 Replacements.emplace_back( 1582 DeferredReplacement::createDelete(Call)); 1583 } 1584 } 1585 1586 Result.StatepointToken = Token; 1587 } 1588 1589 // Replace an existing gc.statepoint with a new one and a set of gc.relocates 1590 // which make the relocations happening at this safepoint explicit. 1591 // 1592 // WARNING: Does not do any fixup to adjust users of the original live 1593 // values. That's the callers responsibility. 1594 static void 1595 makeStatepointExplicit(DominatorTree &DT, CallBase *Call, 1596 PartiallyConstructedSafepointRecord &Result, 1597 std::vector<DeferredReplacement> &Replacements) { 1598 const auto &LiveSet = Result.LiveSet; 1599 const auto &PointerToBase = Result.PointerToBase; 1600 1601 // Convert to vector for efficient cross referencing. 1602 SmallVector<Value *, 64> BaseVec, LiveVec; 1603 LiveVec.reserve(LiveSet.size()); 1604 BaseVec.reserve(LiveSet.size()); 1605 for (Value *L : LiveSet) { 1606 LiveVec.push_back(L); 1607 assert(PointerToBase.count(L)); 1608 Value *Base = PointerToBase.find(L)->second; 1609 BaseVec.push_back(Base); 1610 } 1611 assert(LiveVec.size() == BaseVec.size()); 1612 1613 // Do the actual rewriting and delete the old statepoint 1614 makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements); 1615 } 1616 1617 static void findLiveReferences( 1618 Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate, 1619 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records, 1620 SetVector<Value *> &AddrTakenAllocas, 1621 SetVector<Value *> &ToZero, 1622 SetVector<Value *> &BadLoads, 1623 DefiningValueMapTy &DVCache) { 1624 GCPtrLivenessData OriginalLivenessData; 1625 1626 // Find all allocas. 1627 SetVector<Value *> AllAllocas; 1628 if (ClobberNonLive) 1629 for (Instruction &I : F.getEntryBlock()) 1630 if (isa<AllocaInst>(I) && hasPointer(I.getType()->getPointerElementType())) 1631 AllAllocas.insert(&I); 1632 1633 computeLiveInValues(DT, F, OriginalLivenessData, AddrTakenAllocas, 1634 ToZero, BadLoads, DVCache); 1635 for (size_t i = 0; i < records.size(); i++) { 1636 struct PartiallyConstructedSafepointRecord &info = records[i]; 1637 analyzeParsePointLiveness(DT, OriginalLivenessData, AddrTakenAllocas, 1638 toUpdate[i], info, AllAllocas, DVCache); 1639 } 1640 } 1641 1642 // A helper function that reports whether V is a Phi that contains an 1643 // ambiguously live alloca as input. 1644 static bool 1645 phiContainsAlloca(Value *V, SetVector<Value *> &ToZero, 1646 SetVector<Value *> &AddrTakenAllocas) { 1647 PHINode *Phi0 = dyn_cast<PHINode>(V); 1648 if (!Phi0) 1649 return false; 1650 1651 // Visit all the Phi inputs. Discover new Phis on the go, and visit them. 1652 SmallSet<PHINode *, 4> Phis, Visited; 1653 Phis.insert(Phi0); 1654 while (!Phis.empty()) { 1655 PHINode *Phi = *Phis.begin(); 1656 Visited.insert(Phi); 1657 for (Value *Operand : Phi->incoming_values()) { 1658 if (PHINode *P = dyn_cast<PHINode>(Operand)) { 1659 if (!Visited.count(P)) 1660 Phis.insert(P); 1661 continue; 1662 } 1663 Value *Base = Operand->stripPointerCasts(); 1664 if (ToZero.count(Base) != 0 && AddrTakenAllocas.count(Base) != 0) 1665 return true; 1666 } 1667 Phis.erase(Phi); 1668 } 1669 return false; 1670 } 1671 1672 // Zero ambigously lived stack slots. We insert zeroing at lifetime 1673 // start (or the entry block), so the GC won't see uninitialized 1674 // content. We also insert zeroing at kill sites, to ensure the GC 1675 // won't see a dead slot come back to life. 1676 // We also conservatively extend the lifetime of address-taken slots, 1677 // to prevent the slot being reused while it is still recorded live. 1678 static void 1679 zeroAmbiguouslyLiveSlots(Function &F, SetVector<Value *> &ToZero, 1680 SetVector<Value *> &AddrTakenAllocas) { 1681 SmallVector<Instruction *, 16> InstToDelete; 1682 SetVector<Value *> Done; 1683 const DataLayout &DL = F.getParent()->getDataLayout(); 1684 IntegerType *Int64Ty = Type::getInt64Ty(F.getParent()->getContext()); 1685 IntegerType *Int8Ty = Type::getInt8Ty(F.getParent()->getContext()); 1686 1687 // If a slot has lifetime.start, place the zeroing right after it. 1688 for (Instruction &I : instructions(F)) { 1689 if (CallInst *CI = dyn_cast<CallInst>(&I)) 1690 if (Function *Fn = CI->getCalledFunction()) { 1691 if (Fn->getIntrinsicID() == Intrinsic::lifetime_start) { 1692 Value *V = I.getOperand(1)->stripPointerCasts(); 1693 if ((ToZero.count(V) != 0 && AddrTakenAllocas.count(V) != 0) || 1694 phiContainsAlloca(V, ToZero, AddrTakenAllocas)) { 1695 // For addrtaken alloca, the lifetime start may not dominate all 1696 // safepoints where the slot can be live. 1697 // For now, zero it in the entry block and remove the lifetime 1698 // marker. 1699 // Also remove lifetime markers on Phis that contain interesting 1700 // allocas (which must be address-taken). 1701 InstToDelete.push_back(&I); 1702 } else if (ToZero.count(V) != 0) { 1703 // Non-addrtaken alloca. Just insert zeroing, keep the lifetime marker. 1704 IRBuilder<> Builder(I.getNextNode()); 1705 Value *Zero = Constant::getNullValue(V->getType()->getPointerElementType()); 1706 Builder.CreateStore(Zero, V); 1707 // Don't remove V from ToZero for now, as there may be multiple 1708 // lifetime start markers, where we need to insert zeroing. 1709 Done.insert(V); 1710 } 1711 } else if (Fn->getIntrinsicID() == Intrinsic::lifetime_end) { 1712 Value *V = I.getOperand(1)->stripPointerCasts(); 1713 if ((ToZero.count(V) != 0 && AddrTakenAllocas.count(V) != 0) || 1714 phiContainsAlloca(V, ToZero, AddrTakenAllocas)) { 1715 if (!succ_empty(I.getParent())) { // no need to zero at exit block 1716 // What to zero in the Phi case? 1717 // We just zero whatever the Phi points to, using the size on the 1718 // lifetime marker. This also works in the alloca case. 1719 IRBuilder<> Builder(&I); 1720 Value *Zero = Constant::getNullValue(Int8Ty); 1721 Value *Siz = I.getOperand(0); 1722 Builder.CreateMemSet(V, Zero, Siz, MaybeAlign(0)); 1723 } 1724 InstToDelete.push_back(&I); 1725 } 1726 } 1727 } 1728 } 1729 1730 for (Instruction *I : InstToDelete) 1731 I->eraseFromParent(); 1732 ToZero.set_subtract(Done); 1733 if (ToZero.empty()) 1734 return; 1735 1736 // Otherwise, place the zeroing in the entry block after the alloca. 1737 for (Instruction &I : F.getEntryBlock()) 1738 if (ToZero.count(&I) != 0) { 1739 IRBuilder<> Builder(I.getNextNode()); 1740 Type *ElemTyp = I.getType()->getPointerElementType(); 1741 if (AddrTakenAllocas.count(&I) != 0) { 1742 // For addrtaken alloca, we removed the lifetime marker above. 1743 // Insert a new one at the entry block. 1744 unsigned Size = DL.getTypeStoreSize(ElemTyp); 1745 Builder.CreateLifetimeStart(&I, ConstantInt::get(Int64Ty, Size)); 1746 } 1747 Value *Zero = Constant::getNullValue(ElemTyp); 1748 Builder.CreateStore(Zero, &I); 1749 ToZero.remove(&I); 1750 } 1751 1752 assert(ToZero.empty()); 1753 } 1754 1755 // Detect degenerate Phi. 1756 // Try harder to handle mutually dependent case, like 1757 // a = phi(null, b) 1758 // (in a different block) 1759 // b = phi(a, null) 1760 static Value * 1761 phiHasConstantValue(PHINode *Phi0) { 1762 Value *V = Phi0->hasConstantValue(); 1763 if (V && isa<Constant>(V)) 1764 return V; 1765 1766 V = nullptr; 1767 1768 // Visit all the Phi inputs. Discover new Phis on the go, and visit them. 1769 // Early exit if we see a non-constant or two different constants. 1770 SmallSet<PHINode *, 4> Phis, Visited; 1771 Phis.insert(Phi0); 1772 while (!Phis.empty()) { 1773 PHINode *Phi = *Phis.begin(); 1774 Visited.insert(Phi); 1775 for (Value *Operand : Phi->incoming_values()) { 1776 if (PHINode *P = dyn_cast<PHINode>(Operand)) { 1777 if (!Visited.count(P)) 1778 Phis.insert(P); 1779 continue; 1780 } 1781 if (isa<Constant>(Operand)) { 1782 if (V && V != Operand) 1783 return nullptr; // operands not same 1784 V = Operand; 1785 } else 1786 return nullptr; // has non-constant input 1787 } 1788 Phis.erase(Phi); 1789 } 1790 1791 return V; 1792 } 1793 1794 static void 1795 fixBadPhiOperands(Function &F, SetVector<Value *> &BadLoads) { 1796 // Don't delete instructions yet -- they may be referenced in the liveness 1797 // map. We'll delete them at the end. 1798 1799 for (auto *I : BadLoads) 1800 I->replaceAllUsesWith(Constant::getNullValue(I->getType())); 1801 1802 // The replacement above may lead to degenerate Phis, which, if live, 1803 // will be encoded as constants in the stack map, which is bad 1804 // (confusing with pointer bitmaps). Clean them up. 1805 bool Changed = true; 1806 while (Changed) { 1807 Changed = false; 1808 for (Instruction &I : instructions(F)) 1809 if (auto *Phi = dyn_cast<PHINode>(&I)) 1810 if (!BadLoads.count(Phi)) 1811 if (Value *V = phiHasConstantValue(Phi)) { 1812 Phi->replaceAllUsesWith(V); 1813 BadLoads.insert(Phi); 1814 Changed = true; 1815 } 1816 } 1817 } 1818 1819 static void fixStackWriteBarriers(Function &F, DefiningValueMapTy &DVCache); 1820 1821 static bool insertParsePoints(Function &F, DominatorTree &DT, 1822 TargetTransformInfo &TTI, 1823 SmallVectorImpl<CallBase *> &ToUpdate) { 1824 #ifndef NDEBUG 1825 // sanity check the input 1826 std::set<CallBase *> Uniqued; 1827 Uniqued.insert(ToUpdate.begin(), ToUpdate.end()); 1828 assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!"); 1829 1830 for (CallBase *Call : ToUpdate) 1831 assert(Call->getFunction() == &F); 1832 #endif 1833 1834 // When inserting gc.relocates for invokes, we need to be able to insert at 1835 // the top of the successor blocks. See the comment on 1836 // normalForInvokeSafepoint on exactly what is needed. Note that this step 1837 // may restructure the CFG. 1838 for (CallBase *Call : ToUpdate) { 1839 auto *II = dyn_cast<InvokeInst>(Call); 1840 if (!II) 1841 continue; 1842 normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT); 1843 normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT); 1844 } 1845 1846 SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size()); 1847 1848 SetVector<Value *> AddrTakenAllocas, ToZero; 1849 1850 // In some rare cases, the optimizer may generate a load 1851 // from an uninitialized slot. I saw this happens with LICM's 1852 // load promotion. A load is moved out of the loop, and its 1853 // only used in Phis. In the actual execution when the value 1854 // is used, the Phi is always holding other incoming values, 1855 // which are valid. The problem is that the load or the Phi 1856 // may be recorded live, and the stack scan may heppen before 1857 // it holding valid data, which is bad. We'll record those bad 1858 // loads and remove them from the live sets. 1859 SetVector<Value *> BadLoads; 1860 1861 // Cache the 'defining value' relation used in the computation and 1862 // insertion of base phis and selects. This ensures that we don't insert 1863 // large numbers of duplicate base_phis. 1864 DefiningValueMapTy DVCache; 1865 1866 fixStackWriteBarriers(F, DVCache); 1867 1868 // A) Identify all gc pointers which are statically live at the given call 1869 // site. 1870 findLiveReferences(F, DT, ToUpdate, Records, AddrTakenAllocas, ToZero, 1871 BadLoads, DVCache); 1872 1873 // B) Find the base pointers for each live pointer 1874 for (size_t i = 0; i < Records.size(); i++) { 1875 PartiallyConstructedSafepointRecord &info = Records[i]; 1876 findBasePointers(DT, DVCache, ToUpdate[i], info); 1877 } 1878 1879 fixBadPhiOperands(F, BadLoads); 1880 1881 // It is possible that non-constant live variables have a constant base. For 1882 // example, a GEP with a variable offset from a global. In this case we can 1883 // remove it from the liveset. We already don't add constants to the liveset 1884 // because we assume they won't move at runtime and the GC doesn't need to be 1885 // informed about them. The same reasoning applies if the base is constant. 1886 // Note that the relocation placement code relies on this filtering for 1887 // correctness as it expects the base to be in the liveset, which isn't true 1888 // if the base is constant. 1889 // Also make sure bad loads are not included in the liveset. 1890 for (auto &Info : Records) 1891 for (auto &BasePair : Info.PointerToBase) 1892 if (isa<Constant>(BasePair.second) || BadLoads.count(BasePair.second)) 1893 Info.LiveSet.remove(BasePair.first); 1894 1895 // We need this to safely RAUW and delete call or invoke return values that 1896 // may themselves be live over a statepoint. For details, please see usage in 1897 // makeStatepointExplicitImpl. 1898 std::vector<DeferredReplacement> Replacements; 1899 1900 // Now run through and replace the existing statepoints with new ones with 1901 // the live variables listed. We do not yet update uses of the values being 1902 // relocated. We have references to live variables that need to 1903 // survive to the last iteration of this loop. (By construction, the 1904 // previous statepoint can not be a live variable, thus we can and remove 1905 // the old statepoint calls as we go.) 1906 for (size_t i = 0; i < Records.size(); i++) 1907 makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements); 1908 1909 ToUpdate.clear(); // prevent accident use of invalid calls 1910 1911 for (auto &PR : Replacements) 1912 PR.doReplacement(); 1913 1914 Replacements.clear(); 1915 1916 // At this point we should be able to delete all the bad loads and Phis. 1917 // There should be no reference to them. If there are, it will assert, 1918 // since the liveness args are already linked into the IR. 1919 for (Value *V : BadLoads) 1920 cast<Instruction>(V)->eraseFromParent(); 1921 1922 for (auto &Info : Records) { 1923 // These live sets may contain state Value pointers, since we replaced calls 1924 // with operand bundles with calls wrapped in gc.statepoint, and some of 1925 // those calls may have been def'ing live gc pointers. Clear these out to 1926 // avoid accidentally using them. 1927 // 1928 // TODO: We should create a separate data structure that does not contain 1929 // these live sets, and migrate to using that data structure from this point 1930 // onward. 1931 Info.LiveSet.clear(); 1932 Info.PointerToBase.clear(); 1933 } 1934 1935 // Do all the fixups of the original live variables to their relocated selves 1936 SmallVector<Value *, 128> Live; 1937 for (size_t i = 0; i < Records.size(); i++) { 1938 PartiallyConstructedSafepointRecord &Info = Records[i]; 1939 1940 // We can't simply save the live set from the original insertion. One of 1941 // the live values might be the result of a call which needs a safepoint. 1942 // That Value* no longer exists and we need to use the new gc_result. 1943 // Thankfully, the live set is embedded in the statepoint (and updated), so 1944 // we just grab that. 1945 Live.insert(Live.end(), Info.StatepointToken->gc_args_begin(), 1946 Info.StatepointToken->gc_args_end()); 1947 #ifndef NDEBUG 1948 // Do some basic sanity checks on our liveness results before performing 1949 // relocation. Relocation can and will turn mistakes in liveness results 1950 // into non-sensical code which is must harder to debug. 1951 // TODO: It would be nice to test consistency as well 1952 assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) && 1953 "statepoint must be reachable or liveness is meaningless"); 1954 for (Value *V : Info.StatepointToken->gc_args()) { 1955 if (!isa<Instruction>(V)) 1956 // Non-instruction values trivial dominate all possible uses 1957 continue; 1958 auto *LiveInst = cast<Instruction>(V); 1959 assert(DT.isReachableFromEntry(LiveInst->getParent()) && 1960 "unreachable values should never be live"); 1961 assert(DT.dominates(LiveInst, Info.StatepointToken) && 1962 "basic SSA liveness expectation violated by liveness analysis"); 1963 } 1964 #endif 1965 } 1966 1967 #ifndef NDEBUG 1968 // sanity check 1969 for (auto *Ptr : Live) 1970 assert(isHandledGCPointerType(Ptr->getType()) && 1971 "must be a gc pointer type"); 1972 #endif 1973 1974 zeroAmbiguouslyLiveSlots(F, ToZero, AddrTakenAllocas); 1975 1976 // In clobber-non-live mode, delete all lifetime markers, as the 1977 // inserted clobbering may be beyond the original lifetime. 1978 if (ClobberNonLive) { 1979 SetVector<Instruction *> ToDel; 1980 1981 for (Instruction &I : instructions(F)) 1982 if (CallInst *CI = dyn_cast<CallInst>(&I)) 1983 if (Function *Fn = CI->getCalledFunction()) 1984 if (Fn->getIntrinsicID() == Intrinsic::lifetime_start || 1985 Fn->getIntrinsicID() == Intrinsic::lifetime_end) 1986 ToDel.insert(&I); 1987 1988 for (Instruction *I : ToDel) 1989 I->eraseFromParent(); 1990 } 1991 1992 return !Records.empty(); 1993 } 1994 1995 // Handles both return values and arguments for Functions and calls. 1996 template <typename AttrHolder> 1997 static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, 1998 unsigned Index) { 1999 AttrBuilder R(Ctx); 2000 AttributeSet AS = AH.getAttributes().getAttributes(Index); 2001 if (AS.getDereferenceableBytes()) 2002 R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable, 2003 AS.getDereferenceableBytes())); 2004 if (AS.getDereferenceableOrNullBytes()) 2005 R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, 2006 AS.getDereferenceableOrNullBytes())); 2007 for (auto Attr : ParamAttrsToStrip) 2008 if (AS.hasAttribute(Attr)) 2009 R.addAttribute(Attr); 2010 2011 AttributeSet AS2 = AttributeSet::get(Ctx,R); 2012 if (AS2.getNumAttributes() > 0) 2013 AH.setAttributes(AH.getAttributes().removeAttributesAtIndex(Ctx, Index, AttributeMask(AS2))); 2014 } 2015 2016 static void stripNonValidAttributesFromPrototype(Function &F) { 2017 LLVMContext &Ctx = F.getContext(); 2018 2019 for (Argument &A : F.args()) 2020 if (isa<PointerType>(A.getType())) 2021 RemoveNonValidAttrAtIndex(Ctx, F, 2022 A.getArgNo() + AttributeList::FirstArgIndex); 2023 2024 if (isa<PointerType>(F.getReturnType())) 2025 RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); 2026 2027 for (auto Attr : FnAttrsToStrip) 2028 F.removeFnAttr(Attr); 2029 } 2030 2031 /// Certain metadata on instructions are invalid after running this pass. 2032 /// Optimizations that run after this can incorrectly use this metadata to 2033 /// optimize functions. We drop such metadata on the instruction. 2034 static void stripInvalidMetadataFromInstruction(Instruction &I) { 2035 if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) 2036 return; 2037 // These are the attributes that are still valid on loads and stores after 2038 // this pass. 2039 // The metadata implying dereferenceability and noalias are (conservatively) 2040 // dropped. This is because semantically, after this pass runs, 2041 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can 2042 // touch the entire heap including noalias objects. Note: The reasoning is 2043 // same as stripping the dereferenceability and noalias attributes that are 2044 // analogous to the metadata counterparts. 2045 // We also drop the invariant.load metadata on the load because that metadata 2046 // implies the address operand to the load points to memory that is never 2047 // changed once it became dereferenceable. This is no longer true after this 2048 // pass. Similar reasoning applies to invariant.group metadata, which applies 2049 // to loads within a group. 2050 unsigned ValidMetadata[] = {LLVMContext::MD_tbaa, 2051 LLVMContext::MD_range, 2052 LLVMContext::MD_alias_scope, 2053 LLVMContext::MD_nontemporal, 2054 LLVMContext::MD_nonnull, 2055 LLVMContext::MD_align, 2056 LLVMContext::MD_type}; 2057 2058 // Drops all metadata on the instruction other than ValidMetadata. 2059 I.dropUnknownNonDebugMetadata(ValidMetadata); 2060 } 2061 2062 static void stripNonValidDataFromBody(Function &F) { 2063 if (F.empty()) 2064 return; 2065 2066 LLVMContext &Ctx = F.getContext(); 2067 MDBuilder Builder(Ctx); 2068 2069 // Set of invariantstart instructions that we need to remove. 2070 // Use this to avoid invalidating the instruction iterator. 2071 SmallVector<IntrinsicInst*, 12> InvariantStartInstructions; 2072 2073 for (Instruction &I : instructions(F)) { 2074 // invariant.start on memory location implies that the referenced memory 2075 // location is constant and unchanging. This is no longer true after 2076 // this pass runs because there can be calls to gc.statepoint 2077 // which frees the entire heap and the presence of invariant.start allows 2078 // the optimizer to sink the load of a memory location past a statepoint, 2079 // which is incorrect. 2080 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 2081 if (II->getIntrinsicID() == Intrinsic::invariant_start) { 2082 InvariantStartInstructions.push_back(II); 2083 continue; 2084 } 2085 2086 if (MDNode *Tag = I.getMetadata(LLVMContext::MD_tbaa)) { 2087 MDNode *MutableTBAA = Builder.createMutableTBAAAccessTag(Tag); 2088 I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); 2089 } 2090 2091 stripInvalidMetadataFromInstruction(I); 2092 2093 if (auto *Call = dyn_cast<CallBase>(&I)) { 2094 for (int i = 0, e = Call->arg_size(); i != e; i++) 2095 if (isa<PointerType>(Call->getArgOperand(i)->getType())) 2096 RemoveNonValidAttrAtIndex(Ctx, *Call, i + AttributeList::FirstArgIndex); 2097 if (isa<PointerType>(Call->getType())) 2098 RemoveNonValidAttrAtIndex(Ctx, *Call, AttributeList::ReturnIndex); 2099 } 2100 } 2101 2102 // Delete the invariant.start instructions and RAUW undef. 2103 for (auto *II : InvariantStartInstructions) { 2104 II->replaceAllUsesWith(UndefValue::get(II->getType())); 2105 II->eraseFromParent(); 2106 } 2107 } 2108 2109 /// Returns true if this function should be rewritten by this pass. 2110 static bool shouldRewriteStatepointsIn(Function &F) { 2111 return F.hasGC(); 2112 } 2113 2114 static void stripNonValidData(Module &M) { 2115 #ifndef NDEBUG 2116 assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!"); 2117 #endif 2118 2119 for (Function &F : M) 2120 stripNonValidAttributesFromPrototype(F); 2121 2122 for (Function &F : M) 2123 stripNonValidDataFromBody(F); 2124 } 2125 2126 bool GoStatepoints::runOnFunction(Function &F, DominatorTree &DT, 2127 TargetTransformInfo &TTI, 2128 const TargetLibraryInfo &TLI) { 2129 assert(!F.isDeclaration() && !F.empty() && 2130 "need function body to rewrite statepoints in"); 2131 assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision"); 2132 2133 if (!PrintFunc.empty()) 2134 PrintLiveSet = F.getName() == PrintFunc; 2135 if (PrintLiveSet) 2136 dbgs() << "\n********** Liveness of function " << F.getName() << " **********\n"; 2137 2138 auto NeedsRewrite = [&TLI](Instruction &I) { 2139 if (const auto *Call = dyn_cast<CallBase>(&I)) 2140 return !callsGCLeafFunction(Call, TLI) && !isa<GCStatepointInst>(Call); 2141 return false; 2142 }; 2143 2144 // Delete any unreachable statepoints so that we don't have unrewritten 2145 // statepoints surviving this pass. This makes testing easier and the 2146 // resulting IR less confusing to human readers. 2147 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 2148 bool MadeChange = removeUnreachableBlocks(F, &DTU); 2149 2150 // Rewrite all the calls that need statepoints to invokes, so we can 2151 // attach a stack map through its landing pad. 2152 SmallVector<CallInst *, 64> Calls; 2153 for (Instruction &I : instructions(F)) 2154 if (NeedsRewrite(I) && isa<CallInst>(I)) 2155 Calls.push_back(cast<CallInst>(&I)); 2156 2157 if (!Calls.empty()) { 2158 MadeChange = true; 2159 2160 for (CallInst *CI : Calls) { 2161 // Create a dummy landing pad block. 2162 LLVMContext &C = F.getContext(); 2163 BasicBlock *PadBB = BasicBlock::Create(C, "dummy", &F); 2164 Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); 2165 2166 LandingPadInst *LPad = 2167 LandingPadInst::Create(ExnTy, 1, "dummy.ex", PadBB); 2168 LPad->addClause(Constant::getNullValue(Type::getInt8PtrTy(C))); 2169 new UnreachableInst(PadBB->getContext(), PadBB); 2170 2171 BasicBlock *Old = CI->getParent(); 2172 BasicBlock *New = changeToInvokeAndSplitBasicBlock(CI, PadBB); 2173 2174 // Old dominates New. New node dominates all other nodes dominated by Old. 2175 DomTreeNode *OldNode = DT.getNode(Old); 2176 std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end()); 2177 2178 DomTreeNode *NewNode = DT.addNewBlock(New, Old); 2179 for (DomTreeNode *I : Children) 2180 DT.changeImmediateDominator(I, NewNode); 2181 2182 DTU.insertEdge(Old, PadBB); 2183 } 2184 } 2185 2186 // We should not run removeUnreachableBlocks at this point, as it 2187 // will remove the dummy landing pads. 2188 2189 // Flush the Dominator Tree. 2190 DTU.getDomTree(); 2191 2192 // Gather all the statepoints which need rewritten. Be careful to only 2193 // consider those in reachable code since we need to ask dominance queries 2194 // when rewriting. We'll delete the unreachable ones in a moment. 2195 SmallVector<CallBase *, 64> ParsePointNeeded; 2196 for (Instruction &I : instructions(F)) { 2197 // TODO: only the ones with the flag set! 2198 if (NeedsRewrite(I)) { 2199 // NOTE removeUnreachableBlocks() is stronger than 2200 // DominatorTree::isReachableFromEntry(). In other words 2201 // removeUnreachableBlocks can remove some blocks for which 2202 // isReachableFromEntry() returns true. 2203 assert(DT.isReachableFromEntry(I.getParent()) && 2204 "no unreachable blocks expected"); 2205 ParsePointNeeded.push_back(cast<CallBase>(&I)); 2206 } 2207 } 2208 2209 // Return early if no work to do. 2210 if (ParsePointNeeded.empty()) 2211 return MadeChange; 2212 2213 // As a prepass, go ahead and aggressively destroy single entry phi nodes. 2214 // These are created by LCSSA. They have the effect of increasing the size 2215 // of liveness sets for no good reason. It may be harder to do this post 2216 // insertion since relocations and base phis can confuse things. 2217 for (BasicBlock &BB : F) 2218 if (BB.getUniquePredecessor()) { 2219 MadeChange = true; 2220 FoldSingleEntryPHINodes(&BB); 2221 } 2222 2223 // Before we start introducing relocations, we want to tweak the IR a bit to 2224 // avoid unfortunate code generation effects. The main example is that we 2225 // want to try to make sure the comparison feeding a branch is after any 2226 // safepoints. Otherwise, we end up with a comparison of pre-relocation 2227 // values feeding a branch after relocation. This is semantically correct, 2228 // but results in extra register pressure since both the pre-relocation and 2229 // post-relocation copies must be available in registers. For code without 2230 // relocations this is handled elsewhere, but teaching the scheduler to 2231 // reverse the transform we're about to do would be slightly complex. 2232 // Note: This may extend the live range of the inputs to the icmp and thus 2233 // increase the liveset of any statepoint we move over. This is profitable 2234 // as long as all statepoints are in rare blocks. If we had in-register 2235 // lowering for live values this would be a much safer transform. 2236 auto getConditionInst = [](Instruction *TI) -> Instruction* { 2237 if (auto *BI = dyn_cast<BranchInst>(TI)) 2238 if (BI->isConditional()) 2239 return dyn_cast<Instruction>(BI->getCondition()); 2240 // TODO: Extend this to handle switches 2241 return nullptr; 2242 }; 2243 for (BasicBlock &BB : F) { 2244 Instruction *TI = BB.getTerminator(); 2245 if (auto *Cond = getConditionInst(TI)) 2246 // TODO: Handle more than just ICmps here. We should be able to move 2247 // most instructions without side effects or memory access. 2248 if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) { 2249 MadeChange = true; 2250 Cond->moveBefore(TI); 2251 } 2252 } 2253 2254 MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded); 2255 return MadeChange; 2256 } 2257 2258 // liveness computation via standard dataflow 2259 // ------------------------------------------------------------------- 2260 2261 // TODO: Consider using bitvectors for liveness, the set of potentially 2262 // interesting values should be small and easy to pre-compute. 2263 2264 static Value* 2265 isAlloca(Value *V, DefiningValueMapTy &DVCache) { 2266 Value *Base = findBaseOrBDV(V, DVCache); 2267 return isa<AllocaInst>(Base) ? Base : nullptr; 2268 } 2269 2270 static Value* 2271 isTrackedAlloca(Value *V, DefiningValueMapTy &DVCache) { 2272 Value *Base = isAlloca(V, DVCache); 2273 if (Base && 2274 hasPointer(Base->getType()->getPointerElementType())) 2275 return Base; 2276 return nullptr; 2277 } 2278 2279 static bool 2280 hasStructRetAttr(CallBase *Call) { 2281 return Call->hasStructRetAttr() || 2282 (Call->getNumOperands() > 0 && 2283 Call->getParamAttr(0, "go_sret") != Attribute()); 2284 } 2285 2286 /// Compute the live-in set for the location rbegin starting from 2287 /// the live-out set of the basic block 2288 static void computeLiveInValues(BasicBlock::reverse_iterator Begin, 2289 BasicBlock::reverse_iterator End, 2290 SetVector<Value *> &LiveTmp, 2291 SetVector<Value *> &AddrTakenAllocas, 2292 DefiningValueMapTy &DVCache) { 2293 for (auto &I : make_range(Begin, End)) { 2294 // KILL/Def - Remove this definition from LiveIn 2295 LiveTmp.remove(&I); 2296 2297 // Don't consider *uses* in PHI nodes, we handle their contribution to 2298 // predecessor blocks when we seed the LiveOut sets 2299 if (isa<PHINode>(I)) 2300 continue; 2301 2302 // USE - Add to the LiveIn set for this instruction 2303 for (Value *V : I.operands()) { 2304 assert(!isUnhandledGCPointerType(V->getType()) && 2305 "unexpected value type"); 2306 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) { 2307 // The choice to exclude all things constant here is slightly subtle. 2308 // There are two independent reasons: 2309 // - We assume that things which are constant (from LLVM's definition) 2310 // do not move at runtime. For example, the address of a global 2311 // variable is fixed, even though it's contents may not be. 2312 // - Second, we can't disallow arbitrary inttoptr constants even 2313 // if the language frontend does. Optimization passes are free to 2314 // locally exploit facts without respect to global reachability. This 2315 // can create sections of code which are dynamically unreachable and 2316 // contain just about anything. (see constants.ll in tests) 2317 2318 if (isAlloca(V, DVCache)) { 2319 Value *Base = isTrackedAlloca(V, DVCache); 2320 if (!Base || AddrTakenAllocas.count(Base)) 2321 continue; 2322 2323 // For non-address-taken alloca, record its use. 2324 if (isa<DbgInfoIntrinsic>(I) || isa<BitCastInst>(I) || 2325 isa<GetElementPtrInst>(I) || isa<ICmpInst>(I) || 2326 isa<AddrSpaceCastInst>(I)) 2327 // Not real use. 2328 continue; 2329 if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<InvokeInst>(I)) { 2330 LiveTmp.insert(Base); 2331 continue; 2332 } 2333 2334 if (CallInst *CI = dyn_cast<CallInst>(&I)) { 2335 if (Function *Fn = CI->getCalledFunction()) 2336 switch (Fn->getIntrinsicID()) { 2337 case Intrinsic::lifetime_start: 2338 case Intrinsic::lifetime_end: 2339 case Intrinsic::assume: 2340 // Not real use. 2341 continue; 2342 default: 2343 break; 2344 } 2345 LiveTmp.insert(Base); 2346 continue; 2347 } 2348 2349 // We know it is not address-taken, other operation should not happen. 2350 assert(false && "illegal operation on non-address-taken alloca"); 2351 } 2352 2353 LiveTmp.insert(V); 2354 } 2355 } 2356 } 2357 } 2358 2359 // Compute the def and kill of allocas. 2360 static void 2361 computeAllocaDefs(BasicBlock::iterator Begin, 2362 BasicBlock::iterator End, 2363 SetVector<Value *> &AllocaDefs, 2364 SetVector<Value *> &AllocaKills, 2365 DefiningValueMapTy &DVCache) { 2366 // Iterate forwards over the instructions, record the defs and kills 2367 // of allocas. 2368 // Notes on the special cases about def and kill appearing in the same 2369 // block: 2370 // - kill after def: 2371 // Record the kill but don't remove it from def set, since we will 2372 // subtract the kill set anyway. And when a slot is initialized and 2373 // then killed in the same block, we don't lose information. 2374 // - def after kill: 2375 // The def overrides the kill, i.e. remove it from the kill set 2376 // (unless we see a kill again later). So we have the information 2377 // that the slot is initialized at the end of the block, even after 2378 // we subtract the kill set. 2379 for (auto &I : make_range(Begin, End)) { 2380 // skip Phi ? 2381 if (isa<PHINode>(I)) 2382 continue; 2383 2384 if (StoreInst *SI = dyn_cast<StoreInst>(&I)){ 2385 Value *V = SI->getPointerOperand(); 2386 if (Value *Base = isTrackedAlloca(V, DVCache)) { 2387 AllocaDefs.insert(Base); 2388 AllocaKills.remove(Base); 2389 } 2390 continue; 2391 } 2392 2393 if (CallInst *CI = dyn_cast<CallInst>(&I)){ 2394 if (hasStructRetAttr(CI)) { 2395 Value *V = CI->getOperand(0); 2396 if (Value *Base = isTrackedAlloca(V, DVCache)) { 2397 AllocaDefs.insert(Base); 2398 AllocaKills.remove(Base); 2399 } 2400 } 2401 if (Function *Fn = CI->getCalledFunction()) 2402 switch (Fn->getIntrinsicID()) { 2403 case Intrinsic::memmove: 2404 case Intrinsic::memcpy: 2405 case Intrinsic::memset: { 2406 // We're writing to the first arg. 2407 Value *V = CI->getOperand(0); 2408 if (Value *Base = isTrackedAlloca(V, DVCache)) { 2409 AllocaDefs.insert(Base); 2410 AllocaKills.remove(Base); 2411 } 2412 break; 2413 } 2414 case Intrinsic::lifetime_end: { 2415 Value *V = CI->getOperand(1); 2416 if (Value *Base = isTrackedAlloca(V, DVCache)) { 2417 AllocaKills.insert(Base); 2418 } 2419 break; 2420 } 2421 default: 2422 break; 2423 } 2424 continue; 2425 } 2426 2427 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 2428 if (hasStructRetAttr(II)) { 2429 Value *V = II->getOperand(0); 2430 if (Value *Base = isTrackedAlloca(V, DVCache)) { 2431 AllocaDefs.insert(Base); 2432 AllocaKills.remove(Base); 2433 } 2434 } 2435 continue; 2436 } 2437 } 2438 } 2439 2440 // Determine whether an alloca has its address taken. 2441 // We use different mechanisms to track the liveness of 2442 // address-taken and non-address-taken allocas. 2443 // Also keep track the location where the address is used. 2444 // The alloca needs to be live where its address is taken. 2445 static void 2446 determineAllocaAddrTaken(Function &F, 2447 SetVector<Value *> &AddrTakenAllocas, 2448 MapVector<BasicBlock *, SetVector<Value *>> &AllocaAddrUse, 2449 DefiningValueMapTy &DVCache) { 2450 // Use the metadata inserted by the FE. 2451 for (Instruction &I : F.getEntryBlock()) 2452 if (isa<AllocaInst>(I) && I.getMetadata("go_addrtaken") && 2453 hasPointer(I.getType()->getPointerElementType())) 2454 AddrTakenAllocas.insert(&I); 2455 2456 // The FE's addrtaken mark may be imprecise. Look for certain 2457 // operations in the IR to mark as addrtaken. 2458 // The address may be passed as argument to functions. We trust 2459 // the FE that if it is not marked as addrtaken, the function 2460 // won't hold its address. (for example, the equality function 2461 // of aggregate types.) 2462 for (Instruction &I : instructions(F)) { 2463 if (isa<PHINode>(I) || isa<SelectInst>(I)) 2464 // Phi/select could happen even it is not really addrtaken: 2465 // for example, an IR transformation like 2466 // if (cond) { x = a } else { x = b } 2467 // ==> 2468 // if (cond) { tmp = &a } else { tmp = &b }; x = *tmp 2469 // Things get complicated with them. For now, treat them as 2470 // address taken. 2471 for (Value *V : I.operands()) { 2472 if (!isHandledGCPointerType(V->getType())) 2473 continue; 2474 if (Value *Base = isTrackedAlloca(V, DVCache)) { 2475 AddrTakenAllocas.insert(Base); 2476 AllocaAddrUse[I.getParent()].insert(Base); 2477 } 2478 } 2479 else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { 2480 // If the address of a slot is stored, it must be addrtaken. 2481 // In most cases the FE marks it. One exception is the array 2482 // holding the ... args. 2483 // TODO: maybe we should fix the FE? 2484 Value *V = SI->getValueOperand(); 2485 if (isHandledGCPointerType(V->getType())) 2486 if (Value *Base = isTrackedAlloca(V, DVCache)) { 2487 AddrTakenAllocas.insert(Base); 2488 AllocaAddrUse[I.getParent()].insert(Base); 2489 } 2490 } 2491 } 2492 } 2493 2494 static void computeLiveOutSeed(BasicBlock *BB, SetVector<Value *> &LiveTmp, 2495 DefiningValueMapTy &DVCache) { 2496 for (BasicBlock *Succ : successors(BB)) { 2497 for (auto &I : *Succ) { 2498 PHINode *PN = dyn_cast<PHINode>(&I); 2499 if (!PN) 2500 break; 2501 2502 Value *V = PN->getIncomingValueForBlock(BB); 2503 assert(!isUnhandledGCPointerType(V->getType()) && 2504 "unexpected value type"); 2505 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) { 2506 if (isAlloca(V, DVCache)) 2507 // Alloca is tracked separately. (It is a Phi arg so it 2508 // must be address-taken.) 2509 continue; 2510 LiveTmp.insert(V); 2511 } 2512 } 2513 } 2514 } 2515 2516 static SetVector<Value *> computeKillSet(BasicBlock *BB, DefiningValueMapTy &DVCache) { 2517 SetVector<Value *> KillSet; 2518 for (Instruction &I : *BB) { 2519 if (isHandledGCPointerType(I.getType())) 2520 KillSet.insert(&I); 2521 } 2522 return KillSet; 2523 } 2524 2525 #ifndef NDEBUG 2526 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic 2527 /// sanity check for the liveness computation. 2528 static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live, 2529 Instruction *TI, bool TermOkay = false) { 2530 for (Value *V : Live) { 2531 if (auto *I = dyn_cast<Instruction>(V)) { 2532 // The terminator can be a member of the LiveOut set. LLVM's definition 2533 // of instruction dominance states that V does not dominate itself. As 2534 // such, we need to special case this to allow it. 2535 if (TermOkay && TI == I) 2536 continue; 2537 assert(DT.dominates(I, TI) && 2538 "basic SSA liveness expectation violated by liveness analysis"); 2539 } 2540 } 2541 } 2542 2543 /// Check that all the liveness sets used during the computation of liveness 2544 /// obey basic SSA properties. This is useful for finding cases where we miss 2545 /// a def. 2546 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data, 2547 BasicBlock &BB) { 2548 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator()); 2549 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true); 2550 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator()); 2551 } 2552 #endif 2553 2554 // For initialization of an aggregate-typed slot, check whether the 2555 // whole storage is initialized before we reach a statepoint, and 2556 // insert zeroing if not. 2557 // Normally the FE has lifted calls out of the initialization sequence. 2558 // But they may occur due to optimizations, for example, 2559 // type A struct { ...; b B; ... } 2560 // type B struct { ... } 2561 // a := A{ ..., b: SomeB(), ... } 2562 // The FE generates something like 2563 // %a = alloca A 2564 // %tmp = alloca B 2565 // call SomeB(%tmp) // as outgoing arg 2566 // initialize part of a 2567 // call memmove(gep %a, %tmp) 2568 // initialize the rest of a 2569 // The memmove may be optimized out, with direct store to A, as 2570 // %a = alloca A 2571 // initialize part of a 2572 // call SomeB(gep %a) 2573 // initialize the rest of a 2574 // a is live at the call site, but not fully initialized. 2575 // We need to make sure a doesn't contain bad pointers. 2576 // TODO: this function is a little too conservative (see below). 2577 // TODO: instead of zeroing, maybe we can record only the part 2578 // of A that is live? 2579 static void 2580 checkStoreSize(Value *V, BasicBlock &BB, const DataLayout &DL, 2581 SetVector<Value *> &ToZero, 2582 DefiningValueMapTy &DVCache) { 2583 unsigned PtrSize = DL.getPointerSize(); 2584 unsigned Size = DL.getTypeStoreSize(V->getType()->getPointerElementType()); 2585 if (Size <= PtrSize) 2586 return; 2587 2588 // We simply add the sizes of all stores in the block, assuming 2589 // no overlapping stores (which are silly). 2590 unsigned StoreSize = 0; 2591 for (Instruction &I : BB) { 2592 if (StoreInst *SI = dyn_cast<StoreInst>(&I)) { 2593 Value *Ptr = SI->getPointerOperand(); 2594 if (isTrackedAlloca(Ptr, DVCache) == V) 2595 StoreSize += DL.getTypeStoreSize(SI->getValueOperand()->getType()); 2596 } else if (CallInst *CI = dyn_cast<CallInst>(&I)) { 2597 if (hasStructRetAttr(CI)) { 2598 Value *Ptr = CI->getOperand(0); 2599 if (isTrackedAlloca(Ptr, DVCache) == V) 2600 StoreSize += DL.getTypeStoreSize(Ptr->getType()->getPointerElementType()); 2601 } 2602 if (Function *Fn = CI->getCalledFunction()) 2603 switch (Fn->getIntrinsicID()) { 2604 case Intrinsic::memmove: 2605 case Intrinsic::memcpy: 2606 case Intrinsic::memset: { 2607 // We're writing to the first arg. The third arg is size. 2608 Value *Ptr = CI->getOperand(0); 2609 if (isTrackedAlloca(Ptr, DVCache) == V) { 2610 ConstantInt *Len = 2611 dyn_cast<ConstantInt>(cast<MemIntrinsic>(CI)->getLength()); 2612 if (Len) 2613 StoreSize += Len->getZExtValue(); 2614 } 2615 break; 2616 } 2617 default: 2618 break; 2619 } 2620 } else if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) { 2621 if (hasStructRetAttr(II)) { 2622 Value *Ptr = II->getOperand(0); 2623 if (isTrackedAlloca(Ptr, DVCache) == V) 2624 if (DL.getTypeStoreSize(Ptr->getType()->getPointerElementType()) + PtrSize - 1 >= Size) 2625 // We are storing the whole type 2626 return; 2627 2628 // Othersize we may have stored pointers into the alloca, which 2629 // need to be live, but it is not completely initialized. We need 2630 // to zero it. 2631 // TODO: no need to zero if all previous stores are scalars. 2632 } 2633 } 2634 2635 // We only care about pointers, so it's safe to round up to a pointer size. 2636 // TODO: things with more than a simple padding may still be false positive. 2637 // TODO: if the missing fields are all scalars, no need to zero. 2638 if (StoreSize + PtrSize - 1 >= Size) 2639 return; // early return if we have stored enough. 2640 } 2641 2642 // Incomplete initialization, needs zeroing. 2643 if (StoreSize + PtrSize - 1 < Size) 2644 ToZero.insert(V); 2645 } 2646 2647 static void computeLiveInValues(DominatorTree &DT, Function &F, 2648 GCPtrLivenessData &Data, 2649 SetVector<Value *> &AddrTakenAllocas, 2650 SetVector<Value *> &ToZero, 2651 SetVector<Value *> &BadLoads, 2652 DefiningValueMapTy &DVCache) { 2653 MapVector<BasicBlock *, SetVector<Value *>> AllocaAddrUse; 2654 determineAllocaAddrTaken(F, AddrTakenAllocas, AllocaAddrUse, DVCache); 2655 if (PrintLiveSet) { 2656 dbgs() << "AddrTakenAllocas:\n"; 2657 printLiveSet(AddrTakenAllocas); 2658 } 2659 2660 // Seed the liveness for each individual block 2661 for (BasicBlock &BB : F) { 2662 Data.KillSet[&BB] = computeKillSet(&BB, DVCache); 2663 Data.LiveSet[&BB].clear(); 2664 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], AddrTakenAllocas, DVCache); 2665 computeAllocaDefs(BB.begin(), BB.end(), Data.AllocaDefSet[&BB], Data.AllocaKillSet[&BB], DVCache); 2666 Data.AllocaDefAny[&BB] = Data.AllocaDefSet[&BB]; 2667 Data.AllocaDefAny[&BB].set_subtract(Data.AllocaKillSet[&BB]); 2668 Data.LiveOut[&BB] = SetVector<Value *>(); 2669 computeLiveOutSeed(&BB, Data.LiveOut[&BB], DVCache); 2670 2671 #ifndef NDEBUG 2672 for (Value *Kill : Data.KillSet[&BB]) 2673 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); 2674 #endif 2675 } 2676 2677 // Propagate Alloca def any until stable. 2678 bool changed = true; 2679 again: 2680 while (changed) { 2681 changed = false; 2682 for (BasicBlock &BB : F) { 2683 unsigned OldSize = Data.AllocaDefAny[&BB].size(); 2684 for (BasicBlock *Pred : predecessors(&BB)) 2685 Data.AllocaDefAny[&BB].set_union(Data.AllocaDefAny[Pred]); 2686 Data.AllocaDefAny[&BB].set_subtract(Data.AllocaKillSet[&BB]); 2687 if (Data.AllocaDefAny[&BB].size() != OldSize) 2688 changed = true; 2689 } 2690 } 2691 2692 // When a slot's address is taken, it can be live any point after it. 2693 // It can also be initialized "indirectly", like 2694 // tmp = phi(&a, &b) 2695 // *tmp = ... 2696 // computeAllocaDefs doesn't see this initialization. 2697 // It can be initialized some time later, or never. We don't know for 2698 // sure. The slot needs to be live. And we need to pre-zero it, if we 2699 // don't otherwise know it is initialized. 2700 if (!AllocaAddrUse.empty()) { 2701 for (BasicBlock &BB : F) { 2702 AllocaAddrUse[&BB].set_subtract(Data.AllocaKillSet[&BB]); 2703 for (Value *V : AllocaAddrUse[&BB]) 2704 if (!Data.AllocaDefAny[&BB].count(V)) { 2705 Data.AllocaDefAny[&BB].insert(V); 2706 ToZero.insert(V); 2707 changed = true; 2708 } 2709 } 2710 AllocaAddrUse.clear(); 2711 if (changed) 2712 goto again; // re-propagate alloca def any 2713 } 2714 2715 // Propagate Alloca def all until stable. 2716 for (BasicBlock &BB : F) 2717 Data.AllocaDefAll[&BB] = Data.AllocaDefAny[&BB]; 2718 changed = true; 2719 while (changed) { 2720 changed = false; 2721 for (BasicBlock &BB : F) { 2722 auto NotDefAll = [&](Value *V){ 2723 if (Data.AllocaDefSet[&BB].count(V) != 0) 2724 return false; 2725 for (BasicBlock *Pred : predecessors(&BB)) 2726 if (Data.AllocaDefAll[Pred].count(V) == 0) { 2727 if (PrintLiveSet) 2728 dbgs() << ">>> removing " << V->getName() << " from " << 2729 BB.getName() << " DefAll; pred = " << 2730 Pred->getName() << "\n"; 2731 return true; 2732 } 2733 return false; 2734 }; 2735 if (Data.AllocaDefAll[&BB].remove_if(NotDefAll)) 2736 changed = true; 2737 } 2738 } 2739 2740 const DataLayout &DL = F.getParent()->getDataLayout(); 2741 2742 // An alloca is live only after it is initialized. 2743 // It is initialized in a block if it is defined there and not defined 2744 // in all of the predecessors (or there is no predecessors). 2745 for (BasicBlock &BB : F) { 2746 for (Value *V : Data.AllocaDefSet[&BB]) { 2747 bool Init = false; 2748 if (&BB == &F.getEntryBlock()) 2749 Init = true; 2750 for (BasicBlock *Pred : predecessors(&BB)) 2751 if (!Data.AllocaDefAll[Pred].count(V)) { 2752 Init = true; 2753 break; 2754 } 2755 if (!Init) 2756 continue; 2757 if (!AddrTakenAllocas.count(V)) { // addr-taken alloca is tracked separately 2758 Data.KillSet[&BB].insert(V); 2759 Data.LiveSet[&BB].remove(V); 2760 } 2761 2762 // If it is incomplete initialization, it needs zeroing. 2763 checkStoreSize(V, BB, DL, ToZero, DVCache); 2764 } 2765 } 2766 2767 SmallSetVector<BasicBlock *, 32> Worklist; 2768 for (BasicBlock &BB : F) { 2769 Data.LiveIn[&BB] = Data.LiveSet[&BB]; 2770 Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]); 2771 Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]); 2772 if (!Data.LiveIn[&BB].empty()) 2773 Worklist.insert(pred_begin(&BB), pred_end(&BB)); 2774 } 2775 2776 // Propagate liveness until stable 2777 while (!Worklist.empty()) { 2778 BasicBlock *BB = Worklist.pop_back_val(); 2779 2780 // Compute our new liveout set, then exit early if it hasn't changed despite 2781 // the contribution of our successor. 2782 SetVector<Value *> LiveOut = Data.LiveOut[BB]; 2783 const auto OldLiveOutSize = LiveOut.size(); 2784 for (BasicBlock *Succ : successors(BB)) { 2785 assert(Data.LiveIn.count(Succ)); 2786 LiveOut.set_union(Data.LiveIn[Succ]); 2787 } 2788 // assert OutLiveOut is a subset of LiveOut 2789 if (OldLiveOutSize == LiveOut.size()) { 2790 // If the sets are the same size, then we didn't actually add anything 2791 // when unioning our successors LiveIn. Thus, the LiveIn of this block 2792 // hasn't changed. 2793 continue; 2794 } 2795 Data.LiveOut[BB] = LiveOut; 2796 2797 // Apply the effects of this basic block 2798 SetVector<Value *> LiveTmp = LiveOut; 2799 LiveTmp.set_union(Data.LiveSet[BB]); 2800 LiveTmp.set_subtract(Data.KillSet[BB]); 2801 2802 assert(Data.LiveIn.count(BB)); 2803 const SetVector<Value *> &OldLiveIn = Data.LiveIn[BB]; 2804 // assert: OldLiveIn is a subset of LiveTmp 2805 if (OldLiveIn.size() != LiveTmp.size()) { 2806 Data.LiveIn[BB] = LiveTmp; 2807 Worklist.insert(pred_begin(BB), pred_end(BB)); 2808 } 2809 } // while (!Worklist.empty()) 2810 2811 // Find the bad loads, i.e. loads from uninitialized slots. 2812 // See also the comment in function insertParsePoints. 2813 for (Instruction &I : instructions(F)) 2814 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) { 2815 Value *V = LI->getPointerOperand(); 2816 if (Value *Base = isTrackedAlloca(V, DVCache)) { 2817 BasicBlock *BB = LI->getParent(); 2818 // AllocaDefAll is the set of allocas initialized at the 2819 // end of BB. It doesn't include ones that are killed in 2820 // BB (as they don't reach the end). Add KillSet explicitly. 2821 if (!Data.AllocaDefAll[BB].count(Base) && 2822 !Data.AllocaKillSet[BB].count(Base) && 2823 !AddrTakenAllocas.count(Base)) { 2824 BadLoads.insert(LI); 2825 //dbgs() << "!!! load off uninitialized slot:\n\t" << 2826 // F.getName() << "\n\t" << *LI << "\n"; 2827 } 2828 } 2829 } 2830 2831 // Sanity check: live alloca must be initialized. 2832 // Due to the reason above, uninitialized slot may appear live, 2833 // as the bad load counts as a use. Remove them, as well as the 2834 // bad loads. 2835 for (BasicBlock &BB : F) { 2836 auto NotDefAll = [&](Value *V){ 2837 if (isa<AllocaInst>(V)) { 2838 if (!Data.AllocaDefAll[&BB].count(V)) { 2839 //dbgs() << "!!! alloca live but not initialized:\n\t" << 2840 // F.getName() << " " << BB.getName() << "\n" << *V << "\n"; 2841 return true; 2842 } 2843 } 2844 return false; 2845 }; 2846 Data.LiveOut[&BB].remove_if(NotDefAll); 2847 Data.LiveOut[&BB].set_subtract(BadLoads); 2848 } 2849 2850 // After this point, we only care address-taken allocas. Remove the rest. 2851 for (BasicBlock &BB : F) { 2852 auto NotAddrTaken = [&](Value *V){ return !AddrTakenAllocas.count(V); }; 2853 Data.AllocaDefAny[&BB].remove_if(NotAddrTaken); 2854 2855 // AllocaDefAll doesn't really matter, because we subtract it below. 2856 // Update it just for printing. 2857 Data.AllocaDefAll[&BB].remove_if(NotAddrTaken); 2858 } 2859 2860 // Address-taken allocas initialized and not killed at the end of block is live-out. 2861 // We don't update live-in sets, since live-in is not used after this point. 2862 for (BasicBlock &BB : F) 2863 Data.LiveOut[&BB].set_union(Data.AllocaDefAny[&BB]); 2864 2865 // Record ambiguously live slots (AllocaDefAny - AllocaDefAll), which we need to zero. 2866 for (BasicBlock &BB : F) { 2867 if (PrintLiveSet) { 2868 dbgs() << BB.getName() << " AllocaDefAny:\n"; 2869 printLiveSet(Data.AllocaDefAny[&BB]); 2870 dbgs() << BB.getName() << " AllocaDefAll:\n"; 2871 printLiveSet(Data.AllocaDefAll[&BB]); 2872 } 2873 2874 // NOTE: this clobbers AllocaDefAny. Don't use it after this point. 2875 Data.AllocaDefAny[&BB].set_subtract(Data.AllocaDefAll[&BB]); 2876 ToZero.set_union(Data.AllocaDefAny[&BB]); 2877 2878 if (PrintLiveSet) { 2879 dbgs() << BB.getName() << " ambiguously live:\n"; 2880 printLiveSet(Data.AllocaDefAny[&BB]); 2881 dbgs() << BB.getName() << " LiveOut:\n"; 2882 printLiveSet(Data.LiveOut[&BB]); 2883 } 2884 } 2885 2886 #ifndef NDEBUG 2887 // Sanity check our output against SSA properties. This helps catch any 2888 // missing kills during the above iteration. 2889 for (BasicBlock &BB : F) 2890 checkBasicSSA(DT, Data, BB); 2891 #endif 2892 } 2893 2894 // Compute the set of values live at Inst, store the result in Out. 2895 // 2896 // Side effect: in clobber-non-live mode, the clobbering instructions 2897 // are inserted here. 2898 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, 2899 SetVector<Value *> &AddrTakenAllocas, 2900 StatepointLiveSetTy &Out, 2901 SetVector<Value *> &AllAllocas, 2902 DefiningValueMapTy &DVCache) { 2903 BasicBlock *BB = Inst->getParent(); 2904 2905 // Note: The copy is intentional and required 2906 assert(Data.LiveOut.count(BB)); 2907 SetVector<Value *> LiveOut = Data.LiveOut[BB]; 2908 2909 // We want to handle the statepoint itself oddly. It's 2910 // call result is not live (normal), nor are it's arguments 2911 // (unless they're used again later). 2912 // The statepoint is always an invoke instruction, which is the last 2913 // instruction in the block. The only thing it can initialize is its 2914 // result (passed directly, or indirectly as outgoing arg). 2915 LiveOut.remove(Inst); 2916 if (InvokeInst *II = dyn_cast<InvokeInst>(Inst)) 2917 if (hasStructRetAttr(II)) { 2918 Value *Ptr = II->getOperand(0); 2919 Value *V = Ptr->stripPointerCasts(); 2920 const DataLayout &DL = Inst->getModule()->getDataLayout(); 2921 if (!Data.LiveIn[BB].count(V) && 2922 (DL.getTypeStoreSize(Ptr->getType()->getPointerElementType()) >= 2923 DL.getTypeStoreSize(V->getType()->getPointerElementType()))) 2924 LiveOut.remove(V); 2925 } 2926 2927 // Clobber all non-live allocas. 2928 if (ClobberNonLive) { 2929 SetVector<Value *> ToClobber(AllAllocas); 2930 ToClobber.set_subtract(LiveOut); 2931 if (!ToClobber.empty()) { 2932 IRBuilder<> Builder(Inst); 2933 Type *Int8Ty = IntegerType::get(Inst->getModule()->getContext(), 8); 2934 const DataLayout &DL = Inst->getModule()->getDataLayout(); 2935 Value *Bad = ConstantInt::get(Int8Ty, 0xff); 2936 for (Value *Alloca : ToClobber) { 2937 unsigned Siz = 2938 DL.getTypeStoreSize(Alloca->getType()->getPointerElementType()); 2939 Builder.CreateMemSet(Alloca, Bad, Siz, MaybeAlign(0)); 2940 //dbgs() << "clobber " << *Alloca << " at " << *Inst << "\n"; 2941 } 2942 } 2943 } 2944 2945 Out.insert(LiveOut.begin(), LiveOut.end()); 2946 } 2947 2948 // Remove write barriers for stack writes, for 2949 // 1. write barriers are unnecessary for stack writes, 2950 // 2. if a write barrier is applied to a write on an uninitialized slot, 2951 // the GC may see the bad content. 2952 // This is not the best way to do it: it doesn't remove the conditional 2953 // branch that tests if the write barrier is on. 2954 // It may be better that we insert write barriers not that early. 2955 // 2956 // This function is not really related to statepoints. It is here so 2957 // it can reuse the base pointer calculations (and caching). 2958 static void 2959 fixStackWriteBarriers(Function &F, DefiningValueMapTy &DVCache) { 2960 SmallSet<Instruction *, 8> ToDel; 2961 2962 for (Instruction &I : instructions(F)) { 2963 if (auto *CI = dyn_cast<CallInst>(&I)) 2964 if (Function *Callee = CI->getCalledFunction()) { 2965 if (Callee->getName().equals("runtime.gcWriteBarrier")) { 2966 // gcWriteBarrier(dst, val) 2967 // there is an extra "nest" argument. 2968 Value *Dst = CI->getArgOperand(1), *Val = CI->getArgOperand(2); 2969 if (!isAlloca(Dst, DVCache)) 2970 continue; 2971 IRBuilder<> Builder(CI); 2972 unsigned AS = Dst->getType()->getPointerAddressSpace(); 2973 Dst = Builder.CreateBitCast(Dst, 2974 PointerType::get(Val->getType(), AS)); 2975 Builder.CreateStore(Val, Dst); 2976 ToDel.insert(CI); 2977 } else if (Callee->getName().equals("runtime.typedmemmove")) { 2978 // typedmemmove(typ, dst, src) 2979 // there is an extra "nest" argument. 2980 Value *Dst = CI->getArgOperand(2), *Src = CI->getArgOperand(3); 2981 if (!isAlloca(Dst, DVCache)) 2982 continue; 2983 IRBuilder<> Builder(CI); 2984 // We should know the size at compile time, but at this stage I 2985 // don't know how to retrieve it. Load from the type descriptor 2986 // for now. The size is the first field. The optimizer should be 2987 // able to constant-fold it. 2988 Value *TD = CI->getArgOperand(1); 2989 Type *etyp = TD->getType()->getPointerElementType(); 2990 Value *GEP = Builder.CreateConstInBoundsGEP2_32( 2991 etyp, TD, 0, 0); 2992 Value *Siz = Builder.CreateLoad(etyp, GEP); 2993 llvm::MaybeAlign malgn(0); 2994 Builder.CreateMemMove(Dst, malgn, Src, malgn, Siz); 2995 ToDel.insert(CI); 2996 } 2997 } 2998 } 2999 3000 for (Instruction *I : ToDel) 3001 I->eraseFromParent(); 3002 }