github.com/ttpreport/gvisor-ligolo@v0.0.0-20240123134145-a858404967ba/runsc/specutils/nvidia.go (about) 1 // Copyright 2023 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package specutils 16 17 import ( 18 "fmt" 19 "path/filepath" 20 "regexp" 21 "strconv" 22 "strings" 23 24 specs "github.com/opencontainers/runtime-spec/specs-go" 25 "github.com/ttpreport/gvisor-ligolo/pkg/log" 26 "github.com/ttpreport/gvisor-ligolo/runsc/config" 27 ) 28 29 const nvdEnvVar = "NVIDIA_VISIBLE_DEVICES" 30 31 // GPUFunctionalityRequested returns true if the user intends for the sandbox 32 // to have access to GPU functionality (e.g. access to /dev/nvidiactl), 33 // irrespective of whether or not they want access to any specific GPU. 34 func GPUFunctionalityRequested(spec *specs.Spec, conf *config.Config) bool { 35 if !conf.NVProxy { 36 // nvproxy disabled. 37 return false 38 } 39 if !conf.NVProxyDocker { 40 // nvproxy enabled in non-Docker mode. 41 return true 42 } 43 // nvproxy enabled in Docker mode. 44 // GPU access is only requested if NVIDIA_VISIBLE_DEVICES is non-empty 45 // and set to a value that doesn't mean "no GPU". 46 if spec.Process == nil { 47 return false 48 } 49 nvd, _ := EnvVar(spec.Process.Env, nvdEnvVar) 50 // A value of "none" means "no GPU device, but still access to driver 51 // functionality", so it is not a value we check for here. 52 return nvd != "" && nvd != "void" 53 } 54 55 // CanAccessAtLeastOneGPU returns true if the sandbox and container should 56 // be able to access at least one Nvidia GPU. This is a function of the 57 // sandbox configuration and the container spec's NVIDIA_VISIBLE_DEVICES 58 // environment variable. 59 func CanAccessAtLeastOneGPU(spec *specs.Spec, conf *config.Config) bool { 60 gpus, err := NvidiaDeviceNumbers(spec, conf) 61 if err != nil { 62 log.Warningf("Cannot determine if the container should have access to GPUs: %v", err) 63 return false 64 } 65 return len(gpus) > 0 66 } 67 68 // nvidiaDeviceRegex matches Nvidia GPU device paths. 69 var nvidiaDeviceRegex = regexp.MustCompile(`^/dev/nvidia(\d+)$`) 70 71 // findAllGPUDevices returns the Nvidia GPU device minor numbers of all GPUs 72 // on the machine. 73 func findAllGPUDevices() ([]uint32, error) { 74 paths, err := filepath.Glob("/dev/nvidia*") 75 if err != nil { 76 return nil, fmt.Errorf("enumerating Nvidia device files: %w", err) 77 } 78 var devMinors []uint32 79 for _, path := range paths { 80 if ms := nvidiaDeviceRegex.FindStringSubmatch(path); ms != nil { 81 index, err := strconv.ParseUint(ms[1], 10, 32) 82 if err != nil { 83 return nil, fmt.Errorf("invalid host device file %q: %w", path, err) 84 } 85 devMinors = append(devMinors, uint32(index)) 86 } 87 } 88 return devMinors, nil 89 } 90 91 // NvidiaDeviceNumbers returns the Nvidia GPU device minor numbers that 92 // should be visible to the specified container. 93 // In Docker mode, this is the set of devices specified in 94 // NVIDIA_VISIBLE_DEVICES. 95 // In non-Docker mode, this is all Nvidia devices, as we cannot know the set 96 // of usable GPUs until subcontainer creation. 97 func NvidiaDeviceNumbers(spec *specs.Spec, conf *config.Config) ([]uint32, error) { 98 if !GPUFunctionalityRequested(spec, conf) { 99 return nil, nil 100 } 101 if !conf.NVProxyDocker { 102 // nvproxy enabled in non-Docker mode. 103 // Return all GPUs on the machine. 104 return findAllGPUDevices() 105 } 106 // nvproxy is enabled in Docker mode. 107 nvd, _ := EnvVar(spec.Process.Env, nvdEnvVar) 108 if nvd == "none" { 109 return nil, nil 110 } 111 if nvd == "all" { 112 return findAllGPUDevices() 113 } 114 var devMinors []uint32 115 // Expect nvd to be a list of indices; UUIDs aren't supported 116 // yet. 117 for _, indexStr := range strings.Split(nvd, ",") { 118 index, err := strconv.ParseUint(indexStr, 10, 32) 119 if err != nil { 120 return nil, fmt.Errorf("invalid %q in NVIDIA_VISIBLE_DEVICES %q: %w", indexStr, nvd, err) 121 } 122 devMinors = append(devMinors, uint32(index)) 123 } 124 return devMinors, nil 125 }