github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Transforms/MemRefDataFlowOpt.cpp

github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Transforms/MemRefDataFlowOpt.cpp (about)

     1  //===- MemRefDataFlowOpt.cpp - MemRef DataFlow Optimization pass ------ -*-===//
     2  //
     3  // Copyright 2019 The MLIR Authors.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //   http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  // =============================================================================
    17  //
    18  // This file implements a pass to forward memref stores to loads, thereby
    19  // potentially getting rid of intermediate memref's entirely.
    20  // TODO(mlir-team): In the future, similar techniques could be used to eliminate
    21  // dead memref store's and perform more complex forwarding when support for
    22  // SSA scalars live out of 'affine.for'/'affine.if' statements is available.
    23  //===----------------------------------------------------------------------===//
    24  
    25  #include "mlir/Analysis/AffineAnalysis.h"
    26  #include "mlir/Analysis/Dominance.h"
    27  #include "mlir/Analysis/Utils.h"
    28  #include "mlir/Dialect/AffineOps/AffineOps.h"
    29  #include "mlir/Dialect/StandardOps/Ops.h"
    30  #include "mlir/Pass/Pass.h"
    31  #include "mlir/Transforms/Passes.h"
    32  #include "llvm/ADT/SmallPtrSet.h"
    33  #include <algorithm>
    34  
    35  #define DEBUG_TYPE "memref-dataflow-opt"
    36  
    37  using namespace mlir;
    38  
    39  namespace {
    40  
    41  // The store to load forwarding relies on three conditions:
    42  //
    43  // 1) there has to be a dependence from the store to the load satisfied at the
    44  // block* immediately within the innermost loop enclosing both the load op and
    45  // the store op,
    46  //
    47  // 2) the store op should dominate the load op,
    48  //
    49  // 3) among all candidate store op's that satisfy (1) and (2), if there exists a
    50  // store op that postdominates all those that satisfy (1), such a store op is
    51  // provably the last writer to the particular memref location being loaded from
    52  // by the load op, and its store value can be forwarded to the load.
    53  //
    54  // 4) the load should touch a single location in the memref for a given
    55  // iteration of the innermost loop enclosing both the store op and the load op.
    56  //
    57  // (* A dependence being satisfied at a block: a dependence that is satisfied by
    58  // virtue of the destination operation appearing textually / lexically after
    59  // the source operation within the body of a 'affine.for' operation; thus, a
    60  // dependence is always either satisfied by a loop or by a block).
    61  //
    62  // The above conditions are simple to check, sufficient, and powerful for most
    63  // cases in practice - condition (1) and (3) are precise and necessary, while
    64  // condition (2) is a sufficient one but not necessary (since it doesn't reason
    65  // about loops that are guaranteed to execute at least once).
    66  //
    67  // TODO(mlir-team): more forwarding can be done when support for
    68  // loop/conditional live-out SSA values is available.
    69  // TODO(mlir-team): do general dead store elimination for memref's. This pass
    70  // currently only eliminates the stores only if no other loads/uses (other
    71  // than dealloc) remain.
    72  //
    73  struct MemRefDataFlowOpt : public FunctionPass<MemRefDataFlowOpt> {
    74    void runOnFunction() override;
    75  
    76    void forwardStoreToLoad(AffineLoadOp loadOp);
    77  
    78    // A list of memref's that are potentially dead / could be eliminated.
    79    SmallPtrSet<Value *, 4> memrefsToErase;
    80    // Load op's whose results were replaced by those forwarded from stores.
    81    std::vector<Operation *> loadOpsToErase;
    82  
    83    DominanceInfo *domInfo = nullptr;
    84    PostDominanceInfo *postDomInfo = nullptr;
    85  };
    86  
    87  } // end anonymous namespace
    88  
    89  /// Creates a pass to perform optimizations relying on memref dataflow such as
    90  /// store to load forwarding, elimination of dead stores, and dead allocs.
    91  std::unique_ptr<FunctionPassBase> mlir::createMemRefDataFlowOptPass() {
    92    return std::make_unique<MemRefDataFlowOpt>();
    93  }
    94  
    95  // This is a straightforward implementation not optimized for speed. Optimize
    96  // this in the future if needed.
    97  void MemRefDataFlowOpt::forwardStoreToLoad(AffineLoadOp loadOp) {
    98    Operation *lastWriteStoreOp = nullptr;
    99    Operation *loadOpInst = loadOp.getOperation();
   100  
   101    // First pass over the use list to get minimum number of surrounding
   102    // loops common between the load op and the store op, with min taken across
   103    // all store ops.
   104    SmallVector<Operation *, 8> storeOps;
   105    unsigned minSurroundingLoops = getNestingDepth(*loadOpInst);
   106    for (auto *user : loadOp.getMemRef()->getUsers()) {
   107      auto storeOp = dyn_cast<AffineStoreOp>(user);
   108      if (!storeOp)
   109        continue;
   110      auto *storeOpInst = storeOp.getOperation();
   111      unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
   112      minSurroundingLoops = std::min(nsLoops, minSurroundingLoops);
   113      storeOps.push_back(storeOpInst);
   114    }
   115  
   116    unsigned loadOpDepth = getNestingDepth(*loadOpInst);
   117  
   118    // 1. Check if there is a dependence satisfied at depth equal to the depth
   119    // of the loop body of the innermost common surrounding loop of the storeOp
   120    // and loadOp.
   121    // The list of store op candidates for forwarding - need to satisfy the
   122    // conditions listed at the top.
   123    SmallVector<Operation *, 8> fwdingCandidates;
   124    // Store ops that have a dependence into the load (even if they aren't
   125    // forwarding candidates). Each forwarding candidate will be checked for a
   126    // post-dominance on these. 'fwdingCandidates' are a subset of depSrcStores.
   127    SmallVector<Operation *, 8> depSrcStores;
   128    for (auto *storeOpInst : storeOps) {
   129      MemRefAccess srcAccess(storeOpInst);
   130      MemRefAccess destAccess(loadOpInst);
   131      FlatAffineConstraints dependenceConstraints;
   132      unsigned nsLoops = getNumCommonSurroundingLoops(*loadOpInst, *storeOpInst);
   133      // Dependences at loop depth <= minSurroundingLoops do NOT matter.
   134      for (unsigned d = nsLoops + 1; d > minSurroundingLoops; d--) {
   135        DependenceResult result = checkMemrefAccessDependence(
   136            srcAccess, destAccess, d, &dependenceConstraints,
   137            /*dependenceComponents=*/nullptr);
   138        if (!hasDependence(result))
   139          continue;
   140        depSrcStores.push_back(storeOpInst);
   141        // Check if this store is a candidate for forwarding; we only forward if
   142        // the dependence from the store is carried by the *body* of innermost
   143        // common surrounding loop. As an example this filters out cases like:
   144        // affine.for %i0
   145        //   affine.for %i1
   146        //     %idx = affine.apply (d0) -> (d0 + 1) (%i0)
   147        //     store %A[%idx]
   148        //     load %A[%i0]
   149        //
   150        if (d != nsLoops + 1)
   151          break;
   152  
   153        // 2. The store has to dominate the load op to be candidate. This is not
   154        // strictly a necessary condition since dominance isn't a prerequisite for
   155        // a memref element store to reach a load, but this is sufficient and
   156        // reasonably powerful in practice.
   157        if (!domInfo->dominates(storeOpInst, loadOpInst))
   158          break;
   159  
   160        // Finally, forwarding is only possible if the load touches a single
   161        // location in the memref across the enclosing loops *not* common with the
   162        // store. This is filtering out cases like:
   163        // for (i ...)
   164        //   a [i] = ...
   165        //   for (j ...)
   166        //      ... = a[j]
   167        // If storeOpInst and loadOpDepth at the same nesting depth, the load Op
   168        // is trivially loading from a single location at that depth; so there
   169        // isn't a need to call isRangeOneToOne.
   170        if (getNestingDepth(*storeOpInst) < loadOpDepth) {
   171          MemRefRegion region(loadOpInst->getLoc());
   172          region.compute(loadOpInst, nsLoops);
   173          if (!region.getConstraints()->isRangeOneToOne(
   174                  /*start=*/0, /*limit=*/loadOp.getMemRefType().getRank()))
   175            break;
   176        }
   177  
   178        // After all these conditions, we have a candidate for forwarding!
   179        fwdingCandidates.push_back(storeOpInst);
   180        break;
   181      }
   182    }
   183  
   184    // Note: this can implemented in a cleaner way with postdominator tree
   185    // traversals. Consider this for the future if needed.
   186    for (auto *storeOpInst : fwdingCandidates) {
   187      // 3. Of all the store op's that meet the above criteria, the store
   188      // that postdominates all 'depSrcStores' (if such a store exists) is the
   189      // unique store providing the value to the load, i.e., provably the last
   190      // writer to that memref loc.
   191      if (llvm::all_of(depSrcStores, [&](Operation *depStore) {
   192            return postDomInfo->postDominates(storeOpInst, depStore);
   193          })) {
   194        lastWriteStoreOp = storeOpInst;
   195        break;
   196      }
   197    }
   198    // TODO: optimization for future: those store op's that are determined to be
   199    // postdominated above can actually be recorded and skipped on the 'i' loop
   200    // iteration above --- since they can never post dominate everything.
   201  
   202    if (!lastWriteStoreOp)
   203      return;
   204  
   205    // Perform the actual store to load forwarding.
   206    Value *storeVal = cast<AffineStoreOp>(lastWriteStoreOp).getValueToStore();
   207    loadOp.replaceAllUsesWith(storeVal);
   208    // Record the memref for a later sweep to optimize away.
   209    memrefsToErase.insert(loadOp.getMemRef());
   210    // Record this to erase later.
   211    loadOpsToErase.push_back(loadOpInst);
   212  }
   213  
   214  void MemRefDataFlowOpt::runOnFunction() {
   215    // Only supports single block functions at the moment.
   216    FuncOp f = getFunction();
   217    if (f.getBlocks().size() != 1) {
   218      markAllAnalysesPreserved();
   219      return;
   220    }
   221  
   222    domInfo = &getAnalysis<DominanceInfo>();
   223    postDomInfo = &getAnalysis<PostDominanceInfo>();
   224  
   225    loadOpsToErase.clear();
   226    memrefsToErase.clear();
   227  
   228    // Walk all load's and perform load/store forwarding.
   229    f.walk([&](AffineLoadOp loadOp) { forwardStoreToLoad(loadOp); });
   230  
   231    // Erase all load op's whose results were replaced with store fwd'ed ones.
   232    for (auto *loadOp : loadOpsToErase) {
   233      loadOp->erase();
   234    }
   235  
   236    // Check if the store fwd'ed memrefs are now left with only stores and can
   237    // thus be completely deleted. Note: the canononicalize pass should be able
   238    // to do this as well, but we'll do it here since we collected these anyway.
   239    for (auto *memref : memrefsToErase) {
   240      // If the memref hasn't been alloc'ed in this function, skip.
   241      Operation *defInst = memref->getDefiningOp();
   242      if (!defInst || !isa<AllocOp>(defInst))
   243        // TODO(mlir-team): if the memref was returned by a 'call' operation, we
   244        // could still erase it if the call had no side-effects.
   245        continue;
   246      if (llvm::any_of(memref->getUsers(), [&](Operation *ownerInst) {
   247            return (!isa<AffineStoreOp>(ownerInst) && !isa<DeallocOp>(ownerInst));
   248          }))
   249        continue;
   250  
   251      // Erase all stores, the dealloc, and the alloc on the memref.
   252      for (auto *user : llvm::make_early_inc_range(memref->getUsers()))
   253        user->erase();
   254      defInst->erase();
   255    }
   256  }
   257  
   258  static PassRegistration<MemRefDataFlowOpt>
   259      pass("memref-dataflow-opt", "Perform store/load forwarding for memrefs");