github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp (about) 1 //===- KernelOutlining.cpp - Implementation of GPU kernel outling ---------===// 2 // 3 // Copyright 2019 The MLIR Authors. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // ============================================================================= 17 // 18 // This file implements the GPU dialect kernel outlining pass. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "mlir/Dialect/GPU/GPUDialect.h" 23 #include "mlir/Dialect/GPU/Passes.h" 24 #include "mlir/Dialect/StandardOps/Ops.h" 25 #include "mlir/IR/BlockAndValueMapping.h" 26 #include "mlir/IR/Builders.h" 27 #include "mlir/Pass/Pass.h" 28 29 using namespace mlir; 30 31 template <typename OpTy> 32 static void createForAllDimensions(OpBuilder &builder, Location loc, 33 SmallVectorImpl<Value *> &values) { 34 for (StringRef dim : {"x", "y", "z"}) { 35 Value *v = builder.create<OpTy>(loc, builder.getIndexType(), 36 builder.getStringAttr(dim)); 37 values.push_back(v); 38 } 39 } 40 41 // Add operations generating block/thread ids and gird/block dimensions at the 42 // beginning of `kernelFunc` and replace uses of the respective function args. 43 static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) { 44 OpBuilder OpBuilder(kernelFunc.getBody()); 45 SmallVector<Value *, 12> indexOps; 46 createForAllDimensions<gpu::BlockId>(OpBuilder, loc, indexOps); 47 createForAllDimensions<gpu::ThreadId>(OpBuilder, loc, indexOps); 48 createForAllDimensions<gpu::GridDim>(OpBuilder, loc, indexOps); 49 createForAllDimensions<gpu::BlockDim>(OpBuilder, loc, indexOps); 50 // Replace the leading 12 function args with the respective thread/block index 51 // operations. Iterate backwards since args are erased and indices change. 52 for (int i = 11; i >= 0; --i) { 53 auto &firstBlock = kernelFunc.front(); 54 firstBlock.getArgument(i)->replaceAllUsesWith(indexOps[i]); 55 firstBlock.eraseArgument(i); 56 } 57 } 58 59 // Outline the `gpu.launch` operation body into a kernel function. Replace 60 // `gpu.return` operations by `std.return` in the generated functions. 61 static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) { 62 Location loc = launchOp.getLoc(); 63 SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes()); 64 FunctionType type = 65 FunctionType::get(kernelOperandTypes, {}, launchOp.getContext()); 66 std::string kernelFuncName = 67 Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str(); 68 FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type); 69 outlinedFunc.getBody().takeBody(launchOp.getBody()); 70 Builder builder(launchOp.getContext()); 71 outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(), 72 builder.getUnitAttr()); 73 injectGpuIndexOperations(loc, outlinedFunc); 74 outlinedFunc.walk([](mlir::gpu::Return op) { 75 OpBuilder replacer(op); 76 replacer.create<ReturnOp>(op.getLoc()); 77 op.erase(); 78 }); 79 return outlinedFunc; 80 } 81 82 // Replace `gpu.launch` operations with an `gpu.launch_func` operation launching 83 // `kernelFunc`. 84 static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) { 85 OpBuilder builder(launchOp); 86 SmallVector<Value *, 4> kernelOperandValues( 87 launchOp.getKernelOperandValues()); 88 builder.create<gpu::LaunchFuncOp>( 89 launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(), 90 launchOp.getBlockSizeOperandValues(), kernelOperandValues); 91 launchOp.erase(); 92 } 93 94 namespace { 95 96 class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> { 97 public: 98 void runOnModule() override { 99 ModuleManager moduleManager(getModule()); 100 for (auto func : getModule().getOps<FuncOp>()) { 101 func.walk([&](mlir::gpu::LaunchOp op) { 102 FuncOp outlinedFunc = outlineKernelFunc(op); 103 moduleManager.insert(outlinedFunc); 104 convertToLaunchFuncOp(op, outlinedFunc); 105 }); 106 } 107 } 108 }; 109 110 } // namespace 111 112 std::unique_ptr<ModulePassBase> mlir::createGpuKernelOutliningPass() { 113 return std::make_unique<GpuKernelOutliningPass>(); 114 } 115 116 static PassRegistration<GpuKernelOutliningPass> 117 pass("gpu-kernel-outlining", 118 "Outline gpu.launch bodies to kernel functions.");