github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Transforms/MemRefDataFlowOpt.cpp (about) 1 //===- MemRefDataFlowOpt.cpp - MemRef DataFlow Optimization pass ------ -*-===// 2 // 3 // Copyright 2019 The MLIR Authors. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // ============================================================================= 17 // 18 // This file implements a pass to forward memref stores to loads, thereby 19 // potentially getting rid of intermediate memref's entirely. 20 // TODO(mlir-team): In the future, similar techniques could be used to eliminate 21 // dead memref store's and perform more complex forwarding when support for 22 // SSA scalars live out of 'affine.for'/'affine.if' statements is available. 23 //===----------------------------------------------------------------------===// 24 25 #include "mlir/Analysis/AffineAnalysis.h" 26 #include "mlir/Analysis/Dominance.h" 27 #include "mlir/Analysis/Utils.h" 28 #include "mlir/Dialect/AffineOps/AffineOps.h" 29 #include "mlir/Dialect/StandardOps/Ops.h" 30 #include "mlir/Pass/Pass.h" 31 #include "mlir/Transforms/Passes.h" 32 #include "llvm/ADT/SmallPtrSet.h" 33 #include <algorithm> 34 35 #define DEBUG_TYPE "memref-dataflow-opt" 36 37 using namespace mlir; 38 39 namespace { 40 41 // The store to load forwarding relies on three conditions: 42 // 43 // 1) there has to be a dependence from the store to the load satisfied at the 44 // block* immediately within the innermost loop enclosing both the load op and 45 // the store op, 46 // 47 // 2) the store op should dominate the load op, 48 // 49 // 3) among all candidate store op's that satisfy (1) and (2), if there exists a 50 // store op that postdominates all those that satisfy (1), such a store op is 51 // provably the last writer to the particular memref location being loaded from 52 // by the load op, and its store value can be forwarded to the load. 53 // 54 // 4) the load should touch a single location in the memref for a given 55 // iteration of the innermost loop enclosing both the store op and the load op. 56 // 57 // (* A dependence being satisfied at a block: a dependence that is satisfied by 58 // virtue of the destination operation appearing textually / lexically after 59 // the source operation within the body of a 'affine.for' operation; thus, a 60 // dependence is always either satisfied by a loop or by a block). 61 // 62 // The above conditions are simple to check, sufficient, and powerful for most 63 // cases in practice - condition (1) and (3) are precise and necessary, while 64 // condition (2) is a sufficient one but not necessary (since it doesn't reason 65 // about loops that are guaranteed to execute at least once). 66 // 67 // TODO(mlir-team): more forwarding can be done when support for 68 // loop/conditional live-out SSA values is available. 69 // TODO(mlir-team): do general dead store elimination for memref's. This pass 70 // currently only eliminates the stores only if no other loads/uses (other 71 // than dealloc) remain. 72 // 73 struct MemRefDataFlowOpt : public FunctionPass<MemRefDataFlowOpt> { 74 void runOnFunction() override; 75 76 void forwardStoreToLoad(AffineLoadOp loadOp); 77 78 // A list of memref's that are potentially dead / could be eliminated. 79 SmallPtrSet<Value *, 4> memrefsToErase; 80 // Load op's whose results were replaced by those forwarded from stores. 81 std::vector<Operation *> loadOpsToErase; 82 83 DominanceInfo *domInfo = nullptr; 84 PostDominanceInfo *postDomInfo = nullptr; 85 }; 86 87 } // end anonymous namespace 88 89 /// Creates a pass to perform optimizations relying on memref dataflow such as 90 /// store to load forwarding, elimination of dead stores, and dead allocs. 91 std::unique_ptr<FunctionPassBase> mlir::createMemRefDataFlowOptPass() { 92 return std::make_unique<MemRefDataFlowOpt>(); 93 } 94 95 // This is a straightforward implementation not optimized for speed. Optimize 96 // this in the future if needed. 97 void MemRefDataFlowOpt::forwardStoreToLoad(AffineLoadOp loadOp) { 98 Operation *lastWriteStoreOp = nullptr; 99 Operation *loadOpInst = loadOp.getOperation(); 100 101 // First pass over the use list to get minimum number of surrounding 102 // loops common between the load op and the store op, with min taken across 103 // all store ops. 104 SmallVector<Operation *, 8> storeOps; 105 unsigned minSurroundingLoops = getNestingDepth(*loadOpInst); 106 for (auto *user : loadOp.getMemRef()->getUsers()) { 107 auto storeOp = dyn_cast<AffineStoreOp>(user); 108 if (!storeOp) 109 continue; 110 auto *storeOpInst = storeOp.getOperation(); 111 unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst); 112 minSurroundingLoops = std::min(nsLoops, minSurroundingLoops); 113 storeOps.push_back(storeOpInst); 114 } 115 116 unsigned loadOpDepth = getNestingDepth(*loadOpInst); 117 118 // 1. Check if there is a dependence satisfied at depth equal to the depth 119 // of the loop body of the innermost common surrounding loop of the storeOp 120 // and loadOp. 121 // The list of store op candidates for forwarding - need to satisfy the 122 // conditions listed at the top. 123 SmallVector<Operation *, 8> fwdingCandidates; 124 // Store ops that have a dependence into the load (even if they aren't 125 // forwarding candidates). Each forwarding candidate will be checked for a 126 // post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores. 127 SmallVector<Operation *, 8> depSrcStores; 128 for (auto *storeOpInst : storeOps) { 129 MemRefAccess srcAccess(storeOpInst); 130 MemRefAccess destAccess(loadOpInst); 131 FlatAffineConstraints dependenceConstraints; 132 unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst); 133 // Dependences at loop depth <= minSurroundingLoops do NOT matter. 134 for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) { 135 DependenceResult result = checkMemrefAccessDependence( 136 srcAccess, destAccess, d, &dependenceConstraints, 137 /*dependenceComponents=*/nullptr); 138 if (!hasDependence(result)) 139 continue; 140 depSrcStores.push_back(storeOpInst); 141 // Check if this store is a candidate for forwarding; we only forward if 142 // the dependence from the store is carried by the *body* of innermost 143 // common surrounding loop. As an example this filters out cases like: 144 // affine.for %i0 145 // affine.for %i1 146 // %idx = affine.apply (d0) -> (d0 + 1) (%i0) 147 // store %A[%idx] 148 // load %A[%i0] 149 // 150 if (d != nsLoops + 1) 151 break; 152 153 // 2. The store has to dominate the load op to be candidate. This is not 154 // strictly a necessary condition since dominance isn't a prerequisite for 155 // a memref element store to reach a load, but this is sufficient and 156 // reasonably powerful in practice. 157 if (!domInfo->dominates(storeOpInst, loadOpInst)) 158 break; 159 160 // Finally, forwarding is only possible if the load touches a single 161 // location in the memref across the enclosing loops *not* common with the 162 // store. This is filtering out cases like: 163 // for (i ...) 164 // a [i] = ... 165 // for (j ...) 166 // ... = a[j] 167 // If storeOpInst and loadOpDepth at the same nesting depth, the load Op 168 // is trivially loading from a single location at that depth; so there 169 // isn't a need to call isRangeOneToOne. 170 if (getNestingDepth(*storeOpInst) < loadOpDepth) { 171 MemRefRegion region(loadOpInst->getLoc()); 172 region.compute(loadOpInst, nsLoops); 173 if (!region.getConstraints()->isRangeOneToOne( 174 /*start=*/0, /*limit=*/loadOp.getMemRefType().getRank())) 175 break; 176 } 177 178 // After all these conditions, we have a candidate for forwarding! 179 fwdingCandidates.push_back(storeOpInst); 180 break; 181 } 182 } 183 184 // Note: this can implemented in a cleaner way with postdominator tree 185 // traversals. Consider this for the future if needed. 186 for (auto *storeOpInst : fwdingCandidates) { 187 // 3. Of all the store op's that meet the above criteria, the store 188 // that postdominates all 'depSrcStores' (if such a store exists) is the 189 // unique store providing the value to the load, i.e., provably the last 190 // writer to that memref loc. 191 if (llvm::all_of(depSrcStores, [&](Operation *depStore) { 192 return postDomInfo->postDominates(storeOpInst, depStore); 193 })) { 194 lastWriteStoreOp = storeOpInst; 195 break; 196 } 197 } 198 // TODO: optimization for future: those store op's that are determined to be 199 // postdominated above can actually be recorded and skipped on the 'i' loop 200 // iteration above --- since they can never post dominate everything. 201 202 if (!lastWriteStoreOp) 203 return; 204 205 // Perform the actual store to load forwarding. 206 Value *storeVal = cast<AffineStoreOp>(lastWriteStoreOp).getValueToStore(); 207 loadOp.replaceAllUsesWith(storeVal); 208 // Record the memref for a later sweep to optimize away. 209 memrefsToErase.insert(loadOp.getMemRef()); 210 // Record this to erase later. 211 loadOpsToErase.push_back(loadOpInst); 212 } 213 214 void MemRefDataFlowOpt::runOnFunction() { 215 // Only supports single block functions at the moment. 216 FuncOp f = getFunction(); 217 if (f.getBlocks().size() != 1) { 218 markAllAnalysesPreserved(); 219 return; 220 } 221 222 domInfo = &getAnalysis<DominanceInfo>(); 223 postDomInfo = &getAnalysis<PostDominanceInfo>(); 224 225 loadOpsToErase.clear(); 226 memrefsToErase.clear(); 227 228 // Walk all load's and perform load/store forwarding. 229 f.walk([&](AffineLoadOp loadOp) { forwardStoreToLoad(loadOp); }); 230 231 // Erase all load op's whose results were replaced with store fwd'ed ones. 232 for (auto *loadOp : loadOpsToErase) { 233 loadOp->erase(); 234 } 235 236 // Check if the store fwd'ed memrefs are now left with only stores and can 237 // thus be completely deleted. Note: the canononicalize pass should be able 238 // to do this as well, but we'll do it here since we collected these anyway. 239 for (auto *memref : memrefsToErase) { 240 // If the memref hasn't been alloc'ed in this function, skip. 241 Operation *defInst = memref->getDefiningOp(); 242 if (!defInst || !isa<AllocOp>(defInst)) 243 // TODO(mlir-team): if the memref was returned by a 'call' operation, we 244 // could still erase it if the call had no side-effects. 245 continue; 246 if (llvm::any_of(memref->getUsers(), [&](Operation *ownerInst) { 247 return (!isa<AffineStoreOp>(ownerInst) && !isa<DeallocOp>(ownerInst)); 248 })) 249 continue; 250 251 // Erase all stores, the dealloc, and the alloc on the memref. 252 for (auto *user : llvm::make_early_inc_range(memref->getUsers())) 253 user->erase(); 254 defInst->erase(); 255 } 256 } 257 258 static PassRegistration<MemRefDataFlowOpt> 259 pass("memref-dataflow-opt", "Perform store/load forwarding for memrefs");