github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/lib/Conversion/LoopsToGPU/LoopsToGPUPass.cpp (about)

     1  //===- LoopsToGPUPass.cpp - Convert a loop nest to a GPU kernel -----------===//
     2  //
     3  // Copyright 2019 The MLIR Authors.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //   http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  // =============================================================================
    17  
    18  #include "mlir/Conversion/LoopsToGPU/LoopsToGPUPass.h"
    19  #include "mlir/Conversion/LoopsToGPU/LoopsToGPU.h"
    20  #include "mlir/Dialect/AffineOps/AffineOps.h"
    21  #include "mlir/Dialect/LoopOps/LoopOps.h"
    22  #include "mlir/Pass/Pass.h"
    23  
    24  #include "llvm/Support/CommandLine.h"
    25  
    26  #define PASS_NAME "convert-loops-to-gpu"
    27  
    28  using namespace mlir;
    29  using namespace mlir::loop;
    30  
    31  static llvm::cl::OptionCategory clOptionsCategory(PASS_NAME " options");
    32  static llvm::cl::opt<unsigned>
    33      clNumBlockDims("gpu-block-dims",
    34                     llvm::cl::desc("Number of GPU block dimensions for mapping"),
    35                     llvm::cl::cat(clOptionsCategory), llvm::cl::init(1u));
    36  static llvm::cl::opt<unsigned> clNumThreadDims(
    37      "gpu-thread-dims",
    38      llvm::cl::desc("Number of GPU thread dimensions for mapping"),
    39      llvm::cl::cat(clOptionsCategory), llvm::cl::init(1u));
    40  
    41  namespace {
    42  // A pass that traverses top-level loops in the function and converts them to
    43  // GPU launch operations.  Nested launches are not allowed, so this does not
    44  // walk the function recursively to avoid considering nested loops.
    45  struct ForLoopMapper : public FunctionPass<ForLoopMapper> {
    46    ForLoopMapper(unsigned numBlockDims, unsigned numThreadDims)
    47        : numBlockDims(numBlockDims), numThreadDims(numThreadDims) {}
    48  
    49    void runOnFunction() override {
    50      for (Block &block : getFunction())
    51        for (Operation &op : llvm::make_early_inc_range(block)) {
    52          if (auto forOp = dyn_cast<AffineForOp>(&op)) {
    53            if (failed(convertAffineLoopNestToGPULaunch(forOp, numBlockDims,
    54                                                        numThreadDims)))
    55              signalPassFailure();
    56          } else if (auto forOp = dyn_cast<ForOp>(&op)) {
    57            if (failed(convertLoopNestToGPULaunch(forOp, numBlockDims,
    58                                                  numThreadDims)))
    59              signalPassFailure();
    60          }
    61        }
    62    }
    63  
    64    unsigned numBlockDims;
    65    unsigned numThreadDims;
    66  };
    67  } // namespace
    68  
    69  std::unique_ptr<FunctionPassBase>
    70  mlir::createSimpleLoopsToGPUPass(unsigned numBlockDims,
    71                                   unsigned numThreadDims) {
    72    return std::make_unique<ForLoopMapper>(numBlockDims, numThreadDims);
    73  }
    74  
    75  static PassRegistration<ForLoopMapper>
    76      registration(PASS_NAME, "Convert top-level loops to GPU kernels", [] {
    77        return std::make_unique<ForLoopMapper>(clNumBlockDims.getValue(),
    78                                               clNumThreadDims.getValue());
    79      });