github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp (about) 1 //===- LoopsToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===// 2 // 3 // Copyright 2019 The MLIR Authors. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // ============================================================================= 17 18 #include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h" 19 #include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h" 20 #include "mlir/Dialect/AffineOps/AffineOps.h" 21 #include "mlir/Dialect/LoopOps/LoopOps.h" 22 #include "mlir/Pass/Pass.h" 23 24 #include "llvm/Support/CommandLine.h" 25 26 #define PASS_NAME "convert-loops-to-gpu" 27 28 using namespace mlir; 29 using namespace mlir::loop; 30 31 static llvm::cl::OptionCategory clOptionsCategory(PASS_NAME " options"); 32 static llvm::cl::opt<unsigned> 33 clNumBlockDims("gpu-block-dims", 34 llvm::cl::desc("Number of GPU block dimensions for mapping"), 35 llvm::cl::cat(clOptionsCategory), llvm::cl::init(1u)); 36 static llvm::cl::opt<unsigned> clNumThreadDims( 37 "gpu-thread-dims", 38 llvm::cl::desc("Number of GPU thread dimensions for mapping"), 39 llvm::cl::cat(clOptionsCategory), llvm::cl::init(1u)); 40 41 namespace { 42 // A pass that traverses top-level loops in the function and converts them to 43 // GPU launch operations. Nested launches are not allowed, so this does not 44 // walk the function recursively to avoid considering nested loops. 45 struct ForLoopMapper : public FunctionPass<ForLoopMapper> { 46 ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims) 47 : numBlockDims(numBlockDims), numThreadDims(numThreadDims) {} 48 49 void runOnFunction() override { 50 for (Block &block : getFunction()) 51 for (Operation &op : llvm::make_early_inc_range(block)) { 52 if (auto forOp = dyn_cast<AffineForOp>(&op)) { 53 if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims, 54 numThreadDims))) 55 signalPassFailure(); 56 } else if (auto forOp = dyn_cast<ForOp>(&op)) { 57 if (failed(convertLoopNestToGPULaunch(forOp, numBlockDims, 58 numThreadDims))) 59 signalPassFailure(); 60 } 61 } 62 } 63 64 unsigned numBlockDims; 65 unsigned numThreadDims; 66 }; 67 } // namespace 68 69 std::unique_ptr<FunctionPassBase> 70 mlir::createSimpleLoopsToGPUPass(unsigned numBlockDims, 71 unsigned numThreadDims) { 72 return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims); 73 } 74 75 static PassRegistration<ForLoopMapper> 76 registration(PASS_NAME, "Convert top-level loops to GPU kernels", [] { 77 return std::make_unique<ForLoopMapper>(clNumBlockDims.getValue(), 78 clNumThreadDims.getValue()); 79 });