github.com/johnnyeven/libtools@v0.0.0-20191126065708-61829c1adf46/third_party/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp (about) 1 //===- mlir-cpu-runner.cpp - MLIR CPU Execution Driver---------------------===// 2 // 3 // Copyright 2019 The MLIR Authors. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // ============================================================================= 17 // 18 // This is a command line utility that executes an MLIR file on the GPU by 19 // translating MLIR to NVVM/LVVM IR before JIT-compiling and executing the 20 // latter. 21 // 22 //===----------------------------------------------------------------------===// 23 24 #include "llvm/ADT/STLExtras.h" 25 26 #include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h" 27 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" 28 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" 29 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" 30 #include "mlir/Dialect/GPU/GPUDialect.h" 31 #include "mlir/Dialect/GPU/Passes.h" 32 #include "mlir/Dialect/LLVMIR/LLVMDialect.h" 33 #include "mlir/Dialect/LLVMIR/NVVMDialect.h" 34 #include "mlir/IR/Function.h" 35 #include "mlir/IR/Module.h" 36 #include "mlir/Pass/Pass.h" 37 #include "mlir/Pass/PassManager.h" 38 #include "mlir/Support/JitRunner.h" 39 #include "mlir/Transforms/DialectConversion.h" 40 41 #include "cuda.h" 42 43 using namespace mlir; 44 45 inline void emit_cuda_error(const llvm::Twine &message, const char *buffer, 46 CUresult error, FuncOp &function) { 47 function.emitError(message.concat(" failed with error code ") 48 .concat(llvm::Twine{error}) 49 .concat("[") 50 .concat(buffer) 51 .concat("]")); 52 } 53 54 #define RETURN_ON_CUDA_ERROR(expr, msg) \ 55 { \ 56 auto _cuda_error = (expr); \ 57 if (_cuda_error != CUDA_SUCCESS) { \ 58 emit_cuda_error(msg, jitErrorBuffer, _cuda_error, function); \ 59 return {}; \ 60 } \ 61 } 62 63 OwnedCubin compilePtxToCubin(const std::string ptx, FuncOp &function) { 64 char jitErrorBuffer[4096] = {0}; 65 66 RETURN_ON_CUDA_ERROR(cuInit(0), "cuInit"); 67 68 // Linking requires a device context. 69 CUdevice device; 70 RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0), "cuDeviceGet"); 71 CUcontext context; 72 RETURN_ON_CUDA_ERROR(cuCtxCreate(&context, 0, device), "cuCtxCreate"); 73 CUlinkState linkState; 74 75 CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, 76 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; 77 void *jitOptionsVals[] = {jitErrorBuffer, 78 reinterpret_cast<void *>(sizeof(jitErrorBuffer))}; 79 80 RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ 81 jitOptions, /* jit options */ 82 jitOptionsVals, /* jit option values */ 83 &linkState), 84 "cuLinkCreate"); 85 86 RETURN_ON_CUDA_ERROR( 87 cuLinkAddData(linkState, CUjitInputType::CU_JIT_INPUT_PTX, 88 const_cast<void *>(static_cast<const void *>(ptx.c_str())), 89 ptx.length(), function.getName().data(), /* kernel name */ 90 0, /* number of jit options */ 91 nullptr, /* jit options */ 92 nullptr /* jit option values */ 93 ), 94 "cuLinkAddData"); 95 96 void *cubinData; 97 size_t cubinSize; 98 RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize), 99 "cuLinkComplete"); 100 101 char *cubinAsChar = static_cast<char *>(cubinData); 102 OwnedCubin result = 103 std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize); 104 105 // This will also destroy the cubin data. 106 RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState), "cuLinkDestroy"); 107 108 return result; 109 } 110 111 namespace { 112 // A pass that lowers all Standard and Gpu operations to LLVM dialect. It does 113 // not lower the GPULaunch operation to actual code but dows translate the 114 // signature of its kernel argument. 115 class LowerStandardAndGpuToLLVMAndNVVM 116 : public ModulePass<LowerStandardAndGpuToLLVMAndNVVM> { 117 public: 118 void runOnModule() override { 119 ModuleOp m = getModule(); 120 121 OwningRewritePatternList patterns; 122 LLVMTypeConverter converter(m.getContext()); 123 populateStdToLLVMConversionPatterns(converter, patterns); 124 populateGpuToNVVMConversionPatterns(converter, patterns); 125 126 ConversionTarget target(getContext()); 127 target.addLegalDialect<LLVM::LLVMDialect>(); 128 target.addLegalDialect<NVVM::NVVMDialect>(); 129 target.addLegalOp<ModuleOp>(); 130 target.addLegalOp<ModuleTerminatorOp>(); 131 target.addDynamicallyLegalOp<FuncOp>( 132 [&](FuncOp op) { return converter.isSignatureLegal(op.getType()); }); 133 if (failed(applyFullConversion(m, target, patterns, &converter))) 134 signalPassFailure(); 135 } 136 }; 137 } // end anonymous namespace 138 139 static LogicalResult runMLIRPasses(ModuleOp m) { 140 PassManager pm; 141 142 pm.addPass(createGpuKernelOutliningPass()); 143 pm.addPass(static_cast<std::unique_ptr<ModulePassBase>>( 144 std::make_unique<LowerStandardAndGpuToLLVMAndNVVM>())); 145 pm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin)); 146 pm.addPass(createGenerateCubinAccessorPass()); 147 pm.addPass(createConvertGpuLaunchFuncToCudaCallsPass()); 148 149 if (failed(pm.run(m))) 150 return failure(); 151 152 return success(); 153 } 154 155 int main(int argc, char **argv) { 156 return mlir::JitRunnerMain(argc, argv, &runMLIRPasses); 157 }