github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Analysis/LoopAnalysis.cpp

github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Analysis/LoopAnalysis.cpp (about)

     1  //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===//
     2  //
     3  // Copyright 2019 The MLIR Authors.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //   http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  // =============================================================================
    17  //
    18  // This file implements miscellaneous loop analysis routines.
    19  //
    20  //===----------------------------------------------------------------------===//
    21  
    22  #include "mlir/Analysis/LoopAnalysis.h"
    23  
    24  #include "mlir/Analysis/AffineAnalysis.h"
    25  #include "mlir/Analysis/AffineStructures.h"
    26  #include "mlir/Analysis/NestedMatcher.h"
    27  #include "mlir/Analysis/VectorAnalysis.h"
    28  #include "mlir/Dialect/AffineOps/AffineOps.h"
    29  #include "mlir/Dialect/StandardOps/Ops.h"
    30  #include "mlir/Dialect/VectorOps/VectorOps.h"
    31  #include "mlir/IR/AffineMap.h"
    32  #include "mlir/IR/Builders.h"
    33  #include "mlir/IR/Operation.h"
    34  #include "mlir/Support/Functional.h"
    35  #include "mlir/Support/MathExtras.h"
    36  
    37  #include "llvm/ADT/DenseSet.h"
    38  #include "llvm/ADT/SmallString.h"
    39  #include <type_traits>
    40  
    41  using namespace mlir;
    42  
    43  /// Returns the trip count of the loop as an affine expression if the latter is
    44  /// expressible as an affine expression, and nullptr otherwise. The trip count
    45  /// expression is simplified before returning. This method only utilizes map
    46  /// composition to construct lower and upper bounds before computing the trip
    47  /// count expressions.
    48  // TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a
    49  // pure analysis method relying on FlatAffineConstraints; the latter will also
    50  // be more powerful (since both inequalities and equalities will be considered).
    51  void mlir::buildTripCountMapAndOperands(
    52      AffineForOp forOp, AffineMap *map,
    53      SmallVectorImpl<Value *> *tripCountOperands) {
    54    int64_t loopSpan;
    55  
    56    int64_t step = forOp.getStep();
    57    OpBuilder b(forOp.getOperation());
    58  
    59    if (forOp.hasConstantBounds()) {
    60      int64_t lb = forOp.getConstantLowerBound();
    61      int64_t ub = forOp.getConstantUpperBound();
    62      loopSpan = ub - lb;
    63      if (loopSpan < 0)
    64        loopSpan = 0;
    65      *map = b.getConstantAffineMap(ceilDiv(loopSpan, step));
    66      tripCountOperands->clear();
    67      return;
    68    }
    69    auto lbMap = forOp.getLowerBoundMap();
    70    auto ubMap = forOp.getUpperBoundMap();
    71    if (lbMap.getNumResults() != 1) {
    72      *map = AffineMap();
    73      return;
    74    }
    75    SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands());
    76    SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands());
    77    auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands);
    78    SmallVector<Value *, 4> ubs;
    79    ubs.reserve(ubMap.getNumResults());
    80    for (auto ubExpr : ubMap.getResults())
    81      ubs.push_back(b.create<AffineApplyOp>(
    82          forOp.getLoc(),
    83          b.getAffineMap(ubMap.getNumDims(), ubMap.getNumSymbols(), {ubExpr}),
    84          ubOperands));
    85  
    86    tripCountOperands->clear();
    87    tripCountOperands->reserve(1 + ubs.size());
    88    tripCountOperands->push_back(lb);
    89    tripCountOperands->append(ubs.begin(), ubs.end());
    90  
    91    SmallVector<AffineExpr, 4> tripCountExprs(ubs.size());
    92    for (unsigned i = 0, e = ubs.size(); i < e; i++)
    93      tripCountExprs[i] =
    94          (b.getAffineDimExpr(1 + i) - b.getAffineDimExpr(0)).ceilDiv(step);
    95    *map = b.getAffineMap(1 + ubs.size(), 0, tripCountExprs);
    96  
    97    fullyComposeAffineMapAndOperands(map, tripCountOperands);
    98    *map = simplifyAffineMap(*map);
    99    canonicalizeMapAndOperands(map, tripCountOperands);
   100    // Remove any affine.apply's that became dead as a result of composition,
   101    // simplification, and canonicalization above.
   102    for (auto *v : ubs)
   103      if (v->use_empty())
   104        v->getDefiningOp()->erase();
   105    if (lb.use_empty())
   106      lb.erase();
   107  }
   108  
   109  /// Returns the trip count of the loop if it's a constant, None otherwise. This
   110  /// method uses affine expression analysis (in turn using getTripCount) and is
   111  /// able to determine constant trip count in non-trivial cases.
   112  // FIXME(mlir-team): this is really relying on buildTripCountMapAndOperands;
   113  // being an analysis utility, it shouldn't. Replace with a version that just
   114  // works with analysis structures (FlatAffineConstraints) and thus doesn't
   115  // update the IR.
   116  llvm::Optional<uint64_t> mlir::getConstantTripCount(AffineForOp forOp) {
   117    SmallVector<Value *, 4> operands;
   118    AffineMap map;
   119    buildTripCountMapAndOperands(forOp, &map, &operands);
   120  
   121    if (!map)
   122      return None;
   123  
   124    // Take the min if all trip counts are constant.
   125    Optional<uint64_t> tripCount;
   126    for (auto resultExpr : map.getResults()) {
   127      if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
   128        if (tripCount.hasValue())
   129          tripCount = std::min(tripCount.getValue(),
   130                               static_cast<uint64_t>(constExpr.getValue()));
   131        else
   132          tripCount = constExpr.getValue();
   133      } else
   134        return None;
   135    }
   136    return tripCount;
   137  }
   138  
   139  /// Returns the greatest known integral divisor of the trip count. Affine
   140  /// expression analysis is used (indirectly through getTripCount), and
   141  /// this method is thus able to determine non-trivial divisors.
   142  uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) {
   143    SmallVector<Value *, 4> operands;
   144    AffineMap map;
   145    buildTripCountMapAndOperands(forOp, &map, &operands);
   146  
   147    if (!map)
   148      return 1;
   149  
   150    // The largest divisor of the trip count is the GCD of the individual largest
   151    // divisors.
   152    assert(map.getNumResults() >= 1 && "expected one or more results");
   153    Optional<uint64_t> gcd;
   154    for (auto resultExpr : map.getResults()) {
   155      uint64_t thisGcd;
   156      if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) {
   157        uint64_t tripCount = constExpr.getValue();
   158        // 0 iteration loops (greatest divisor is 2^64 - 1).
   159        if (tripCount == 0)
   160          thisGcd = std::numeric_limits<uint64_t>::max();
   161        else
   162          // The greatest divisor is the trip count.
   163          thisGcd = tripCount;
   164      } else {
   165        // Trip count is not a known constant; return its largest known divisor.
   166        thisGcd = resultExpr.getLargestKnownDivisor();
   167      }
   168      if (gcd.hasValue())
   169        gcd = llvm::GreatestCommonDivisor64(gcd.getValue(), thisGcd);
   170      else
   171        gcd = thisGcd;
   172    }
   173    assert(gcd.hasValue() && "value expected per above logic");
   174    return gcd.getValue();
   175  }
   176  
   177  bool mlir::isAccessInvariant(Value *iv, Value *index) {
   178    assert(isForInductionVar(iv) && "iv must be a AffineForOp");
   179    assert(index->getType().isa<IndexType>() && "index must be of IndexType");
   180    SmallVector<Operation *, 4> affineApplyOps;
   181    getReachableAffineApplyOps({index}, affineApplyOps);
   182  
   183    if (affineApplyOps.empty()) {
   184      // Pointer equality test because of Value pointer semantics.
   185      return index != iv;
   186    }
   187  
   188    if (affineApplyOps.size() > 1) {
   189      affineApplyOps[0]->emitRemark(
   190          "CompositionAffineMapsPass must have been run: there should be at most "
   191          "one AffineApplyOp, returning false conservatively.");
   192      return false;
   193    }
   194  
   195    auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]);
   196    // We need yet another level of indirection because the `dim` index of the
   197    // access may not correspond to the `dim` index of composeOp.
   198    return !(AffineValueMap(composeOp).isFunctionOf(0, iv));
   199  }
   200  
   201  llvm::DenseSet<Value *>
   202  mlir::getInvariantAccesses(Value *iv, llvm::ArrayRef<Value *> indices) {
   203    llvm::DenseSet<Value *> res;
   204    for (unsigned idx = 0, n = indices.size(); idx < n; ++idx) {
   205      auto *val = indices[idx];
   206      if (isAccessInvariant(iv, val)) {
   207        res.insert(val);
   208      }
   209    }
   210    return res;
   211  }
   212  
   213  /// Given:
   214  ///   1. an induction variable `iv` of type AffineForOp;
   215  ///   2. a `memoryOp` of type const LoadOp& or const StoreOp&;
   216  /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous
   217  /// is defined as either invariant or varying only along a unique MemRef dim.
   218  /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to
   219  /// convey the memRef access is invariant along `iv`).
   220  ///
   221  /// Prerequisites:
   222  ///   1. `memRefDim` ~= nullptr;
   223  ///   2. `iv` of the proper type;
   224  ///   3. the MemRef accessed by `memoryOp` has no layout map or at most an
   225  ///      identity layout map.
   226  ///
   227  /// Currently only supports no layoutMap or identity layoutMap in the MemRef.
   228  /// Returns false if the MemRef has a non-identity layoutMap or more than 1
   229  /// layoutMap. This is conservative.
   230  ///
   231  // TODO(ntv): check strides.
   232  template <typename LoadOrStoreOp>
   233  static bool isContiguousAccess(Value *iv, LoadOrStoreOp memoryOp,
   234                                 int *memRefDim) {
   235    static_assert(std::is_same<LoadOrStoreOp, AffineLoadOp>::value ||
   236                      std::is_same<LoadOrStoreOp, AffineStoreOp>::value,
   237                  "Must be called on either const LoadOp & or const StoreOp &");
   238    assert(memRefDim && "memRefDim == nullptr");
   239    auto memRefType = memoryOp.getMemRefType();
   240  
   241    auto layoutMap = memRefType.getAffineMaps();
   242    // TODO(ntv): remove dependence on Builder once we support non-identity
   243    // layout map.
   244    Builder b(memoryOp.getContext());
   245    if (layoutMap.size() >= 2 ||
   246        (layoutMap.size() == 1 &&
   247         !(layoutMap[0] ==
   248           b.getMultiDimIdentityMap(layoutMap[0].getNumDims())))) {
   249      return memoryOp.emitError("NYI: non-trivial layoutMap"), false;
   250    }
   251  
   252    int uniqueVaryingIndexAlongIv = -1;
   253    auto accessMap = memoryOp.getAffineMap();
   254    SmallVector<Value *, 4> mapOperands(memoryOp.getIndices());
   255    unsigned numDims = accessMap.getNumDims();
   256    for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) {
   257      // Gather map operands used result expr 'i' in 'exprOperands'.
   258      SmallVector<Value *, 4> exprOperands;
   259      auto resultExpr = accessMap.getResult(i);
   260      resultExpr.walk([&](AffineExpr expr) {
   261        if (auto dimExpr = expr.dyn_cast<AffineDimExpr>())
   262          exprOperands.push_back(mapOperands[dimExpr.getPosition()]);
   263        else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>())
   264          exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]);
   265      });
   266      // Check access invariance of each operand in 'exprOperands'.
   267      for (auto *exprOperand : exprOperands) {
   268        if (!isAccessInvariant(iv, exprOperand)) {
   269          if (uniqueVaryingIndexAlongIv != -1) {
   270            // 2+ varying indices -> do not vectorize along iv.
   271            return false;
   272          }
   273          uniqueVaryingIndexAlongIv = i;
   274        }
   275      }
   276    }
   277  
   278    if (uniqueVaryingIndexAlongIv == -1)
   279      *memRefDim = -1;
   280    else
   281      *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1);
   282    return true;
   283  }
   284  
   285  template <typename LoadOrStoreOpPointer>
   286  static bool isVectorElement(LoadOrStoreOpPointer memoryOp) {
   287    auto memRefType = memoryOp.getMemRefType();
   288    return memRefType.getElementType().template isa<VectorType>();
   289  }
   290  
   291  static bool isVectorTransferReadOrWrite(Operation &op) {
   292    return isa<vector::VectorTransferReadOp>(op) ||
   293           isa<vector::VectorTransferWriteOp>(op);
   294  }
   295  
   296  using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>;
   297  
   298  static bool
   299  isVectorizableLoopBodyWithOpCond(AffineForOp loop,
   300                                   VectorizableOpFun isVectorizableOp) {
   301    auto *forOp = loop.getOperation();
   302  
   303    // No vectorization across conditionals for now.
   304    auto conditionals = matcher::If();
   305    SmallVector<NestedMatch, 8> conditionalsMatched;
   306    conditionals.match(forOp, &conditionalsMatched);
   307    if (!conditionalsMatched.empty()) {
   308      return false;
   309    }
   310  
   311    // No vectorization across unknown regions.
   312    auto regions = matcher::Op([](Operation &op) -> bool {
   313      return op.getNumRegions() != 0 &&
   314             !(isa<AffineIfOp>(op) || isa<AffineForOp>(op));
   315    });
   316    SmallVector<NestedMatch, 8> regionsMatched;
   317    regions.match(forOp, &regionsMatched);
   318    if (!regionsMatched.empty()) {
   319      return false;
   320    }
   321  
   322    auto vectorTransfers = matcher::Op(isVectorTransferReadOrWrite);
   323    SmallVector<NestedMatch, 8> vectorTransfersMatched;
   324    vectorTransfers.match(forOp, &vectorTransfersMatched);
   325    if (!vectorTransfersMatched.empty()) {
   326      return false;
   327    }
   328  
   329    auto loadAndStores = matcher::Op(matcher::isLoadOrStore);
   330    SmallVector<NestedMatch, 8> loadAndStoresMatched;
   331    loadAndStores.match(forOp, &loadAndStoresMatched);
   332    for (auto ls : loadAndStoresMatched) {
   333      auto *op = ls.getMatchedOperation();
   334      auto load = dyn_cast<AffineLoadOp>(op);
   335      auto store = dyn_cast<AffineStoreOp>(op);
   336      // Only scalar types are considered vectorizable, all load/store must be
   337      // vectorizable for a loop to qualify as vectorizable.
   338      // TODO(ntv): ponder whether we want to be more general here.
   339      bool vector = load ? isVectorElement(load) : isVectorElement(store);
   340      if (vector) {
   341        return false;
   342      }
   343      if (isVectorizableOp && !isVectorizableOp(loop, *op)) {
   344        return false;
   345      }
   346    }
   347    return true;
   348  }
   349  
   350  bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim) {
   351    VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) {
   352      auto load = dyn_cast<AffineLoadOp>(op);
   353      auto store = dyn_cast<AffineStoreOp>(op);
   354      return load ? isContiguousAccess(loop.getInductionVar(), load, memRefDim)
   355                  : isContiguousAccess(loop.getInductionVar(), store, memRefDim);
   356    });
   357    return isVectorizableLoopBodyWithOpCond(loop, fun);
   358  }
   359  
   360  bool mlir::isVectorizableLoopBody(AffineForOp loop) {
   361    return isVectorizableLoopBodyWithOpCond(loop, nullptr);
   362  }
   363  
   364  /// Checks whether SSA dominance would be violated if a for op's body
   365  /// operations are shifted by the specified shifts. This method checks if a
   366  /// 'def' and all its uses have the same shift factor.
   367  // TODO(mlir-team): extend this to check for memory-based dependence violation
   368  // when we have the support.
   369  bool mlir::isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) {
   370    auto *forBody = forOp.getBody();
   371    assert(shifts.size() == forBody->getOperations().size());
   372  
   373    // Work backwards over the body of the block so that the shift of a use's
   374    // ancestor operation in the block gets recorded before it's looked up.
   375    DenseMap<Operation *, uint64_t> forBodyShift;
   376    for (auto it : llvm::enumerate(llvm::reverse(forBody->getOperations()))) {
   377      auto &op = it.value();
   378  
   379      // Get the index of the current operation, note that we are iterating in
   380      // reverse so we need to fix it up.
   381      size_t index = shifts.size() - it.index() - 1;
   382  
   383      // Remember the shift of this operation.
   384      uint64_t shift = shifts[index];
   385      forBodyShift.try_emplace(&op, shift);
   386  
   387      // Validate the results of this operation if it were to be shifted.
   388      for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) {
   389        Value *result = op.getResult(i);
   390        for (auto *user : result->getUsers()) {
   391          // If an ancestor operation doesn't lie in the block of forOp,
   392          // there is no shift to check.
   393          if (auto *ancInst = forBody->findAncestorInstInBlock(*user)) {
   394            assert(forBodyShift.count(ancInst) > 0 && "ancestor expected in map");
   395            if (shift != forBodyShift[ancInst])
   396              return false;
   397          }
   398        }
   399      }
   400    }
   401    return true;
   402  }