github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp

github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp (about)

     1  //===- mlir-cpu-runner.cpp - MLIR CPU Execution Driver---------------------===//
     2  //
     3  // Copyright 2019 The MLIR Authors.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //   http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  // =============================================================================
    17  //
    18  // This is a command line utility that executes an MLIR file on the GPU by
    19  // translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the
    20  // latter.
    21  //
    22  //===----------------------------------------------------------------------===//
    23  
    24  #include "llvm/ADT/STLExtras.h"
    25  
    26  #include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
    27  #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
    28  #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
    29  #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
    30  #include "mlir/Dialect/GPU/GPUDialect.h"
    31  #include "mlir/Dialect/GPU/Passes.h"
    32  #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
    33  #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
    34  #include "mlir/IR/Function.h"
    35  #include "mlir/IR/Module.h"
    36  #include "mlir/Pass/Pass.h"
    37  #include "mlir/Pass/PassManager.h"
    38  #include "mlir/Support/JitRunner.h"
    39  #include "mlir/Transforms/DialectConversion.h"
    40  
    41  #include "cuda.h"
    42  
    43  using namespace mlir;
    44  
    45  inline void emit_cuda_error(const llvm::Twine &message, const char *buffer,
    46                              CUresult error, FuncOp &function) {
    47    function.emitError(message.concat(" failed with error code ")
    48                           .concat(llvm::Twine{error})
    49                           .concat("[")
    50                           .concat(buffer)
    51                           .concat("]"));
    52  }
    53  
    54  #define RETURN_ON_CUDA_ERROR(expr, msg)                                        \
    55    {                                                                            \
    56      auto _cuda_error = (expr);                                                 \
    57      if (_cuda_error != CUDA_SUCCESS) {                                         \
    58        emit_cuda_error(msg, jitErrorBuffer, _cuda_error, function);             \
    59        return {};                                                               \
    60      }                                                                          \
    61    }
    62  
    63  OwnedCubin compilePtxToCubin(const std::string ptx, FuncOp &function) {
    64    char jitErrorBuffer[4096] = {0};
    65  
    66    RETURN_ON_CUDA_ERROR(cuInit(0), "cuInit");
    67  
    68    // Linking requires a device context.
    69    CUdevice device;
    70    RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0), "cuDeviceGet");
    71    CUcontext context;
    72    RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device), "cuCtxCreate");
    73    CUlinkState linkState;
    74  
    75    CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
    76                                 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
    77    void *jitOptionsVals[] = {jitErrorBuffer,
    78                              reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
    79  
    80    RETURN_ON_CUDA_ERROR(cuLinkCreate(2,              /* number of jit options */
    81                                      jitOptions,     /* jit options */
    82                                      jitOptionsVals, /* jit option values */
    83                                      &linkState),
    84                         "cuLinkCreate");
    85  
    86    RETURN_ON_CUDA_ERROR(
    87        cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX,
    88                      const_cast<void *>(static_cast<const void *>(ptx.c_str())),
    89                      ptx.length(), function.getName().data(), /* kernel name */
    90                      0,       /* number of jit options */
    91                      nullptr, /* jit options */
    92                      nullptr  /* jit option values */
    93                      ),
    94        "cuLinkAddData");
    95  
    96    void *cubinData;
    97    size_t cubinSize;
    98    RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize),
    99                         "cuLinkComplete");
   100  
   101    char *cubinAsChar = static_cast<char *>(cubinData);
   102    OwnedCubin result =
   103        std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
   104  
   105    // This will also destroy the cubin data.
   106    RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState), "cuLinkDestroy");
   107  
   108    return result;
   109  }
   110  
   111  namespace {
   112  // A pass that lowers all Standard and Gpu operations to LLVM dialect. It does
   113  // not lower the GPULaunch operation to actual code but dows translate the
   114  // signature of its kernel argument.
   115  class LowerStandardAndGpuToLLVMAndNVVM
   116      : public ModulePass<LowerStandardAndGpuToLLVMAndNVVM> {
   117  public:
   118    void runOnModule() override {
   119      ModuleOp m = getModule();
   120  
   121      OwningRewritePatternList patterns;
   122      LLVMTypeConverter converter(m.getContext());
   123      populateStdToLLVMConversionPatterns(converter, patterns);
   124      populateGpuToNVVMConversionPatterns(converter, patterns);
   125  
   126      ConversionTarget target(getContext());
   127      target.addLegalDialect<LLVM::LLVMDialect>();
   128      target.addLegalDialect<NVVM::NVVMDialect>();
   129      target.addLegalOp<ModuleOp>();
   130      target.addLegalOp<ModuleTerminatorOp>();
   131      target.addDynamicallyLegalOp<FuncOp>(
   132          [&](FuncOp op) { return converter.isSignatureLegal(op.getType()); });
   133      if (failed(applyFullConversion(m, target, patterns, &converter)))
   134        signalPassFailure();
   135    }
   136  };
   137  } // end anonymous namespace
   138  
   139  static LogicalResult runMLIRPasses(ModuleOp m) {
   140    PassManager pm;
   141  
   142    pm.addPass(createGpuKernelOutliningPass());
   143    pm.addPass(static_cast<std::unique_ptr<ModulePassBase>>(
   144        std::make_unique<LowerStandardAndGpuToLLVMAndNVVM>()));
   145    pm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin));
   146    pm.addPass(createGenerateCubinAccessorPass());
   147    pm.addPass(createConvertGpuLaunchFuncToCudaCallsPass());
   148  
   149    if (failed(pm.run(m)))
   150      return failure();
   151  
   152    return success();
   153  }
   154  
   155  int main(int argc, char **argv) {
   156    return mlir::JitRunnerMain(argc, argv, &runMLIRPasses);
   157  }