github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp (about)

     1  //===- KernelOutlining.cpp - Implementation of GPU kernel outling ---------===//
     2  //
     3  // Copyright 2019 The MLIR Authors.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //   http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  // =============================================================================
    17  //
    18  // This file implements the GPU dialect kernel outlining pass.
    19  //
    20  //===----------------------------------------------------------------------===//
    21  
    22  #include "mlir/Dialect/GPU/GPUDialect.h"
    23  #include "mlir/Dialect/GPU/Passes.h"
    24  #include "mlir/Dialect/StandardOps/Ops.h"
    25  #include "mlir/IR/BlockAndValueMapping.h"
    26  #include "mlir/IR/Builders.h"
    27  #include "mlir/Pass/Pass.h"
    28  
    29  using namespace mlir;
    30  
    31  template <typename OpTy>
    32  static void createForAllDimensions(OpBuilder &builder, Location loc,
    33                                     SmallVectorImpl<Value *> &values) {
    34    for (StringRef dim : {"x", "y", "z"}) {
    35      Value *v = builder.create<OpTy>(loc, builder.getIndexType(),
    36                                      builder.getStringAttr(dim));
    37      values.push_back(v);
    38    }
    39  }
    40  
    41  // Add operations generating block/thread ids and gird/block dimensions at the
    42  // beginning of `kernelFunc` and replace uses of the respective function args.
    43  static void injectGpuIndexOperations(Location loc, FuncOp kernelFunc) {
    44    OpBuilder OpBuilder(kernelFunc.getBody());
    45    SmallVector<Value *, 12> indexOps;
    46    createForAllDimensions<gpu::BlockId>(OpBuilder, loc, indexOps);
    47    createForAllDimensions<gpu::ThreadId>(OpBuilder, loc, indexOps);
    48    createForAllDimensions<gpu::GridDim>(OpBuilder, loc, indexOps);
    49    createForAllDimensions<gpu::BlockDim>(OpBuilder, loc, indexOps);
    50    // Replace the leading 12 function args with the respective thread/block index
    51    // operations. Iterate backwards since args are erased and indices change.
    52    for (int i = 11; i >= 0; --i) {
    53      auto &firstBlock = kernelFunc.front();
    54      firstBlock.getArgument(i)->replaceAllUsesWith(indexOps[i]);
    55      firstBlock.eraseArgument(i);
    56    }
    57  }
    58  
    59  // Outline the `gpu.launch` operation body into a kernel function. Replace
    60  // `gpu.return` operations by `std.return` in the generated functions.
    61  static FuncOp outlineKernelFunc(gpu::LaunchOp launchOp) {
    62    Location loc = launchOp.getLoc();
    63    SmallVector<Type, 4> kernelOperandTypes(launchOp.getKernelOperandTypes());
    64    FunctionType type =
    65        FunctionType::get(kernelOperandTypes, {}, launchOp.getContext());
    66    std::string kernelFuncName =
    67        Twine(launchOp.getParentOfType<FuncOp>().getName(), "_kernel").str();
    68    FuncOp outlinedFunc = FuncOp::create(loc, kernelFuncName, type);
    69    outlinedFunc.getBody().takeBody(launchOp.getBody());
    70    Builder builder(launchOp.getContext());
    71    outlinedFunc.setAttr(gpu::GPUDialect::getKernelFuncAttrName(),
    72                         builder.getUnitAttr());
    73    injectGpuIndexOperations(loc, outlinedFunc);
    74    outlinedFunc.walk([](mlir::gpu::Return op) {
    75      OpBuilder replacer(op);
    76      replacer.create<ReturnOp>(op.getLoc());
    77      op.erase();
    78    });
    79    return outlinedFunc;
    80  }
    81  
    82  // Replace `gpu.launch` operations with an `gpu.launch_func` operation launching
    83  // `kernelFunc`.
    84  static void convertToLaunchFuncOp(gpu::LaunchOp &launchOp, FuncOp kernelFunc) {
    85    OpBuilder builder(launchOp);
    86    SmallVector<Value *, 4> kernelOperandValues(
    87        launchOp.getKernelOperandValues());
    88    builder.create<gpu::LaunchFuncOp>(
    89        launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
    90        launchOp.getBlockSizeOperandValues(), kernelOperandValues);
    91    launchOp.erase();
    92  }
    93  
    94  namespace {
    95  
    96  class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> {
    97  public:
    98    void runOnModule() override {
    99      ModuleManager moduleManager(getModule());
   100      for (auto func : getModule().getOps<FuncOp>()) {
   101        func.walk([&](mlir::gpu::LaunchOp op) {
   102          FuncOp outlinedFunc = outlineKernelFunc(op);
   103          moduleManager.insert(outlinedFunc);
   104          convertToLaunchFuncOp(op, outlinedFunc);
   105        });
   106      }
   107    }
   108  };
   109  
   110  } // namespace
   111  
   112  std::unique_ptr<ModulePassBase> mlir::createGpuKernelOutliningPass() {
   113    return std::make_unique<GpuKernelOutliningPass>();
   114  }
   115  
   116  static PassRegistration<GpuKernelOutliningPass>
   117      pass("gpu-kernel-outlining",
   118           "Outline gpu.launch bodies to kernel functions.");