github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Analysis/LoopAnalysis.cpp (about) 1 //===- LoopAnalysis.cpp - Misc loop analysis routines //-------------------===// 2 // 3 // Copyright 2019 The MLIR Authors. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // ============================================================================= 17 // 18 // This file implements miscellaneous loop analysis routines. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "mlir/Analysis/LoopAnalysis.h" 23 24 #include "mlir/Analysis/AffineAnalysis.h" 25 #include "mlir/Analysis/AffineStructures.h" 26 #include "mlir/Analysis/NestedMatcher.h" 27 #include "mlir/Analysis/VectorAnalysis.h" 28 #include "mlir/Dialect/AffineOps/AffineOps.h" 29 #include "mlir/Dialect/StandardOps/Ops.h" 30 #include "mlir/Dialect/VectorOps/VectorOps.h" 31 #include "mlir/IR/AffineMap.h" 32 #include "mlir/IR/Builders.h" 33 #include "mlir/IR/Operation.h" 34 #include "mlir/Support/Functional.h" 35 #include "mlir/Support/MathExtras.h" 36 37 #include "llvm/ADT/DenseSet.h" 38 #include "llvm/ADT/SmallString.h" 39 #include <type_traits> 40 41 using namespace mlir; 42 43 /// Returns the trip count of the loop as an affine expression if the latter is 44 /// expressible as an affine expression, and nullptr otherwise. The trip count 45 /// expression is simplified before returning. This method only utilizes map 46 /// composition to construct lower and upper bounds before computing the trip 47 /// count expressions. 48 // TODO(mlir-team): this should be moved into 'Transforms/' and be replaced by a 49 // pure analysis method relying on FlatAffineConstraints; the latter will also 50 // be more powerful (since both inequalities and equalities will be considered). 51 void mlir::buildTripCountMapAndOperands( 52 AffineForOp forOp, AffineMap *map, 53 SmallVectorImpl<Value *> *tripCountOperands) { 54 int64_t loopSpan; 55 56 int64_t step = forOp.getStep(); 57 OpBuilder b(forOp.getOperation()); 58 59 if (forOp.hasConstantBounds()) { 60 int64_t lb = forOp.getConstantLowerBound(); 61 int64_t ub = forOp.getConstantUpperBound(); 62 loopSpan = ub - lb; 63 if (loopSpan < 0) 64 loopSpan = 0; 65 *map = b.getConstantAffineMap(ceilDiv(loopSpan, step)); 66 tripCountOperands->clear(); 67 return; 68 } 69 auto lbMap = forOp.getLowerBoundMap(); 70 auto ubMap = forOp.getUpperBoundMap(); 71 if (lbMap.getNumResults() != 1) { 72 *map = AffineMap(); 73 return; 74 } 75 SmallVector<Value *, 4> lbOperands(forOp.getLowerBoundOperands()); 76 SmallVector<Value *, 4> ubOperands(forOp.getUpperBoundOperands()); 77 auto lb = b.create<AffineApplyOp>(forOp.getLoc(), lbMap, lbOperands); 78 SmallVector<Value *, 4> ubs; 79 ubs.reserve(ubMap.getNumResults()); 80 for (auto ubExpr : ubMap.getResults()) 81 ubs.push_back(b.create<AffineApplyOp>( 82 forOp.getLoc(), 83 b.getAffineMap(ubMap.getNumDims(), ubMap.getNumSymbols(), {ubExpr}), 84 ubOperands)); 85 86 tripCountOperands->clear(); 87 tripCountOperands->reserve(1 + ubs.size()); 88 tripCountOperands->push_back(lb); 89 tripCountOperands->append(ubs.begin(), ubs.end()); 90 91 SmallVector<AffineExpr, 4> tripCountExprs(ubs.size()); 92 for (unsigned i = 0, e = ubs.size(); i < e; i++) 93 tripCountExprs[i] = 94 (b.getAffineDimExpr(1 + i) - b.getAffineDimExpr(0)).ceilDiv(step); 95 *map = b.getAffineMap(1 + ubs.size(), 0, tripCountExprs); 96 97 fullyComposeAffineMapAndOperands(map, tripCountOperands); 98 *map = simplifyAffineMap(*map); 99 canonicalizeMapAndOperands(map, tripCountOperands); 100 // Remove any affine.apply's that became dead as a result of composition, 101 // simplification, and canonicalization above. 102 for (auto *v : ubs) 103 if (v->use_empty()) 104 v->getDefiningOp()->erase(); 105 if (lb.use_empty()) 106 lb.erase(); 107 } 108 109 /// Returns the trip count of the loop if it's a constant, None otherwise. This 110 /// method uses affine expression analysis (in turn using getTripCount) and is 111 /// able to determine constant trip count in non-trivial cases. 112 // FIXME(mlir-team): this is really relying on buildTripCountMapAndOperands; 113 // being an analysis utility, it shouldn't. Replace with a version that just 114 // works with analysis structures (FlatAffineConstraints) and thus doesn't 115 // update the IR. 116 llvm::Optional<uint64_t> mlir::getConstantTripCount(AffineForOp forOp) { 117 SmallVector<Value *, 4> operands; 118 AffineMap map; 119 buildTripCountMapAndOperands(forOp, &map, &operands); 120 121 if (!map) 122 return None; 123 124 // Take the min if all trip counts are constant. 125 Optional<uint64_t> tripCount; 126 for (auto resultExpr : map.getResults()) { 127 if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) { 128 if (tripCount.hasValue()) 129 tripCount = std::min(tripCount.getValue(), 130 static_cast<uint64_t>(constExpr.getValue())); 131 else 132 tripCount = constExpr.getValue(); 133 } else 134 return None; 135 } 136 return tripCount; 137 } 138 139 /// Returns the greatest known integral divisor of the trip count. Affine 140 /// expression analysis is used (indirectly through getTripCount), and 141 /// this method is thus able to determine non-trivial divisors. 142 uint64_t mlir::getLargestDivisorOfTripCount(AffineForOp forOp) { 143 SmallVector<Value *, 4> operands; 144 AffineMap map; 145 buildTripCountMapAndOperands(forOp, &map, &operands); 146 147 if (!map) 148 return 1; 149 150 // The largest divisor of the trip count is the GCD of the individual largest 151 // divisors. 152 assert(map.getNumResults() >= 1 && "expected one or more results"); 153 Optional<uint64_t> gcd; 154 for (auto resultExpr : map.getResults()) { 155 uint64_t thisGcd; 156 if (auto constExpr = resultExpr.dyn_cast<AffineConstantExpr>()) { 157 uint64_t tripCount = constExpr.getValue(); 158 // 0 iteration loops (greatest divisor is 2^64 - 1). 159 if (tripCount == 0) 160 thisGcd = std::numeric_limits<uint64_t>::max(); 161 else 162 // The greatest divisor is the trip count. 163 thisGcd = tripCount; 164 } else { 165 // Trip count is not a known constant; return its largest known divisor. 166 thisGcd = resultExpr.getLargestKnownDivisor(); 167 } 168 if (gcd.hasValue()) 169 gcd = llvm::GreatestCommonDivisor64(gcd.getValue(), thisGcd); 170 else 171 gcd = thisGcd; 172 } 173 assert(gcd.hasValue() && "value expected per above logic"); 174 return gcd.getValue(); 175 } 176 177 bool mlir::isAccessInvariant(Value *iv, Value *index) { 178 assert(isForInductionVar(iv) && "iv must be a AffineForOp"); 179 assert(index->getType().isa<IndexType>() && "index must be of IndexType"); 180 SmallVector<Operation *, 4> affineApplyOps; 181 getReachableAffineApplyOps({index}, affineApplyOps); 182 183 if (affineApplyOps.empty()) { 184 // Pointer equality test because of Value pointer semantics. 185 return index != iv; 186 } 187 188 if (affineApplyOps.size() > 1) { 189 affineApplyOps[0]->emitRemark( 190 "CompositionAffineMapsPass must have been run: there should be at most " 191 "one AffineApplyOp, returning false conservatively."); 192 return false; 193 } 194 195 auto composeOp = cast<AffineApplyOp>(affineApplyOps[0]); 196 // We need yet another level of indirection because the `dim` index of the 197 // access may not correspond to the `dim` index of composeOp. 198 return !(AffineValueMap(composeOp).isFunctionOf(0, iv)); 199 } 200 201 llvm::DenseSet<Value *> 202 mlir::getInvariantAccesses(Value *iv, llvm::ArrayRef<Value *> indices) { 203 llvm::DenseSet<Value *> res; 204 for (unsigned idx = 0, n = indices.size(); idx < n; ++idx) { 205 auto *val = indices[idx]; 206 if (isAccessInvariant(iv, val)) { 207 res.insert(val); 208 } 209 } 210 return res; 211 } 212 213 /// Given: 214 /// 1. an induction variable `iv` of type AffineForOp; 215 /// 2. a `memoryOp` of type const LoadOp& or const StoreOp&; 216 /// determines whether `memoryOp` has a contiguous access along `iv`. Contiguous 217 /// is defined as either invariant or varying only along a unique MemRef dim. 218 /// Upon success, the unique MemRef dim is written in `memRefDim` (or -1 to 219 /// convey the memRef access is invariant along `iv`). 220 /// 221 /// Prerequisites: 222 /// 1. `memRefDim` ~= nullptr; 223 /// 2. `iv` of the proper type; 224 /// 3. the MemRef accessed by `memoryOp` has no layout map or at most an 225 /// identity layout map. 226 /// 227 /// Currently only supports no layoutMap or identity layoutMap in the MemRef. 228 /// Returns false if the MemRef has a non-identity layoutMap or more than 1 229 /// layoutMap. This is conservative. 230 /// 231 // TODO(ntv): check strides. 232 template <typename LoadOrStoreOp> 233 static bool isContiguousAccess(Value *iv, LoadOrStoreOp memoryOp, 234 int *memRefDim) { 235 static_assert(std::is_same<LoadOrStoreOp, AffineLoadOp>::value || 236 std::is_same<LoadOrStoreOp, AffineStoreOp>::value, 237 "Must be called on either const LoadOp & or const StoreOp &"); 238 assert(memRefDim && "memRefDim == nullptr"); 239 auto memRefType = memoryOp.getMemRefType(); 240 241 auto layoutMap = memRefType.getAffineMaps(); 242 // TODO(ntv): remove dependence on Builder once we support non-identity 243 // layout map. 244 Builder b(memoryOp.getContext()); 245 if (layoutMap.size() >= 2 || 246 (layoutMap.size() == 1 && 247 !(layoutMap[0] == 248 b.getMultiDimIdentityMap(layoutMap[0].getNumDims())))) { 249 return memoryOp.emitError("NYI: non-trivial layoutMap"), false; 250 } 251 252 int uniqueVaryingIndexAlongIv = -1; 253 auto accessMap = memoryOp.getAffineMap(); 254 SmallVector<Value *, 4> mapOperands(memoryOp.getIndices()); 255 unsigned numDims = accessMap.getNumDims(); 256 for (unsigned i = 0, e = memRefType.getRank(); i < e; ++i) { 257 // Gather map operands used result expr 'i' in 'exprOperands'. 258 SmallVector<Value *, 4> exprOperands; 259 auto resultExpr = accessMap.getResult(i); 260 resultExpr.walk([&](AffineExpr expr) { 261 if (auto dimExpr = expr.dyn_cast<AffineDimExpr>()) 262 exprOperands.push_back(mapOperands[dimExpr.getPosition()]); 263 else if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>()) 264 exprOperands.push_back(mapOperands[numDims + symExpr.getPosition()]); 265 }); 266 // Check access invariance of each operand in 'exprOperands'. 267 for (auto *exprOperand : exprOperands) { 268 if (!isAccessInvariant(iv, exprOperand)) { 269 if (uniqueVaryingIndexAlongIv != -1) { 270 // 2+ varying indices -> do not vectorize along iv. 271 return false; 272 } 273 uniqueVaryingIndexAlongIv = i; 274 } 275 } 276 } 277 278 if (uniqueVaryingIndexAlongIv == -1) 279 *memRefDim = -1; 280 else 281 *memRefDim = memRefType.getRank() - (uniqueVaryingIndexAlongIv + 1); 282 return true; 283 } 284 285 template <typename LoadOrStoreOpPointer> 286 static bool isVectorElement(LoadOrStoreOpPointer memoryOp) { 287 auto memRefType = memoryOp.getMemRefType(); 288 return memRefType.getElementType().template isa<VectorType>(); 289 } 290 291 static bool isVectorTransferReadOrWrite(Operation &op) { 292 return isa<vector::VectorTransferReadOp>(op) || 293 isa<vector::VectorTransferWriteOp>(op); 294 } 295 296 using VectorizableOpFun = std::function<bool(AffineForOp, Operation &)>; 297 298 static bool 299 isVectorizableLoopBodyWithOpCond(AffineForOp loop, 300 VectorizableOpFun isVectorizableOp) { 301 auto *forOp = loop.getOperation(); 302 303 // No vectorization across conditionals for now. 304 auto conditionals = matcher::If(); 305 SmallVector<NestedMatch, 8> conditionalsMatched; 306 conditionals.match(forOp, &conditionalsMatched); 307 if (!conditionalsMatched.empty()) { 308 return false; 309 } 310 311 // No vectorization across unknown regions. 312 auto regions = matcher::Op([](Operation &op) -> bool { 313 return op.getNumRegions() != 0 && 314 !(isa<AffineIfOp>(op) || isa<AffineForOp>(op)); 315 }); 316 SmallVector<NestedMatch, 8> regionsMatched; 317 regions.match(forOp, ®ionsMatched); 318 if (!regionsMatched.empty()) { 319 return false; 320 } 321 322 auto vectorTransfers = matcher::Op(isVectorTransferReadOrWrite); 323 SmallVector<NestedMatch, 8> vectorTransfersMatched; 324 vectorTransfers.match(forOp, &vectorTransfersMatched); 325 if (!vectorTransfersMatched.empty()) { 326 return false; 327 } 328 329 auto loadAndStores = matcher::Op(matcher::isLoadOrStore); 330 SmallVector<NestedMatch, 8> loadAndStoresMatched; 331 loadAndStores.match(forOp, &loadAndStoresMatched); 332 for (auto ls : loadAndStoresMatched) { 333 auto *op = ls.getMatchedOperation(); 334 auto load = dyn_cast<AffineLoadOp>(op); 335 auto store = dyn_cast<AffineStoreOp>(op); 336 // Only scalar types are considered vectorizable, all load/store must be 337 // vectorizable for a loop to qualify as vectorizable. 338 // TODO(ntv): ponder whether we want to be more general here. 339 bool vector = load ? isVectorElement(load) : isVectorElement(store); 340 if (vector) { 341 return false; 342 } 343 if (isVectorizableOp && !isVectorizableOp(loop, *op)) { 344 return false; 345 } 346 } 347 return true; 348 } 349 350 bool mlir::isVectorizableLoopBody(AffineForOp loop, int *memRefDim) { 351 VectorizableOpFun fun([memRefDim](AffineForOp loop, Operation &op) { 352 auto load = dyn_cast<AffineLoadOp>(op); 353 auto store = dyn_cast<AffineStoreOp>(op); 354 return load ? isContiguousAccess(loop.getInductionVar(), load, memRefDim) 355 : isContiguousAccess(loop.getInductionVar(), store, memRefDim); 356 }); 357 return isVectorizableLoopBodyWithOpCond(loop, fun); 358 } 359 360 bool mlir::isVectorizableLoopBody(AffineForOp loop) { 361 return isVectorizableLoopBodyWithOpCond(loop, nullptr); 362 } 363 364 /// Checks whether SSA dominance would be violated if a for op's body 365 /// operations are shifted by the specified shifts. This method checks if a 366 /// 'def' and all its uses have the same shift factor. 367 // TODO(mlir-team): extend this to check for memory-based dependence violation 368 // when we have the support. 369 bool mlir::isInstwiseShiftValid(AffineForOp forOp, ArrayRef<uint64_t> shifts) { 370 auto *forBody = forOp.getBody(); 371 assert(shifts.size() == forBody->getOperations().size()); 372 373 // Work backwards over the body of the block so that the shift of a use's 374 // ancestor operation in the block gets recorded before it's looked up. 375 DenseMap<Operation *, uint64_t> forBodyShift; 376 for (auto it : llvm::enumerate(llvm::reverse(forBody->getOperations()))) { 377 auto &op = it.value(); 378 379 // Get the index of the current operation, note that we are iterating in 380 // reverse so we need to fix it up. 381 size_t index = shifts.size() - it.index() - 1; 382 383 // Remember the shift of this operation. 384 uint64_t shift = shifts[index]; 385 forBodyShift.try_emplace(&op, shift); 386 387 // Validate the results of this operation if it were to be shifted. 388 for (unsigned i = 0, e = op.getNumResults(); i < e; ++i) { 389 Value *result = op.getResult(i); 390 for (auto *user : result->getUsers()) { 391 // If an ancestor operation doesn't lie in the block of forOp, 392 // there is no shift to check. 393 if (auto *ancInst = forBody->findAncestorInstInBlock(*user)) { 394 assert(forBodyShift.count(ancInst) > 0 && "ancestor expected in map"); 395 if (shift != forBodyShift[ancInst]) 396 return false; 397 } 398 } 399 } 400 } 401 return true; 402 }