github.com/apptainer/singularity@v3.1.1+incompatible/internal/pkg/util/nvidiautils/nvidiautils.go (about) 1 // Copyright (c) 2018, Sylabs Inc. All rights reserved. 2 // This software is licensed under a 3-clause BSD license. Please consult the 3 // LICENSE.md file distributed with the sources of this project regarding your 4 // rights to use or distribute this software. 5 6 package nvidiautils 7 8 import ( 9 "bufio" 10 "bytes" 11 "debug/elf" 12 "fmt" 13 "os" 14 "os/exec" 15 "path/filepath" 16 "regexp" 17 "strings" 18 19 "github.com/sylabs/singularity/internal/pkg/sylog" 20 ) 21 22 // generate bind list using nvidia-container-cli 23 func nvidiaContainerCli() ([]string, error) { 24 var strArray []string 25 26 // use nvidia-container-cli (if present) 27 command, err := exec.LookPath("nvidia-container-cli") 28 if err != nil { 29 return nil, fmt.Errorf("no nvidia-container-cli present: %v", err) 30 } 31 32 cmd := exec.Command(command, "list", "--binaries", "--libraries") 33 out, err := cmd.Output() 34 if err != nil { 35 return nil, fmt.Errorf("Unable to execute nvidia-container-cli: %v", err) 36 } 37 38 reader := bytes.NewReader(out) 39 scanner := bufio.NewScanner(reader) 40 41 for scanner.Scan() { 42 line := strings.TrimSpace(scanner.Text()) 43 44 if line != "" { 45 // if this is a library, then add a .so entry as well 46 if strings.Contains(line, ".so") { 47 fileName := filepath.Base(line) 48 strArray = append(strArray, fileName) // add entry to list to be bound 49 50 // strip off .xxx.xx prefix and add so entry as well 51 newentry := strings.SplitAfter(fileName, ".so") 52 strArray = append(strArray, newentry[0]) // add prefix (filepath.so) 53 } else { 54 // Assume we're a binary and need the full path 55 strArray = append(strArray, line) 56 } 57 } 58 } 59 return strArray, nil 60 } 61 62 // generate bind list using contents of nvliblist.conf 63 func nvidiaLiblist(abspath string) ([]string, error) { 64 var strArray []string 65 66 // grab the entries in nvliblist.conf file 67 file, err := os.Open(abspath + "/nvliblist.conf") 68 if err != nil { 69 return nil, fmt.Errorf("%v", err) 70 } 71 defer file.Close() 72 73 scanner := bufio.NewScanner(file) 74 for scanner.Scan() { 75 line := strings.TrimSpace(scanner.Text()) 76 if !strings.HasPrefix(line, "#") && line != "" { 77 strArray = append(strArray, line) 78 } 79 } 80 return strArray, nil 81 } 82 83 // GetNvidiaPath returns a string array consisting of filepaths of nvidia 84 // related files to be added to the BindPaths 85 func GetNvidiaPath(abspath string, envPath string) (libraries []string, binaries []string, err error) { 86 var strArray []string 87 88 // replace PATH with custom environment variable 89 // and restore it when returning 90 if envPath != "" { 91 oldPath := os.Getenv("PATH") 92 os.Setenv("PATH", envPath) 93 94 defer os.Setenv("PATH", oldPath) 95 } 96 97 // use nvidia-container-cli if present 98 strArray, err = nvidiaContainerCli() 99 if err != nil { 100 sylog.Verbosef("nvidiaContainercli returned: %v", err) 101 sylog.Verbosef("Falling back to nvliblist.conf") 102 103 // nvidia-container-cli not present or errored out 104 // fallback is to use nvliblist.conf 105 strArray, err = nvidiaLiblist(abspath) 106 if err != nil { 107 sylog.Warningf("nvidiaLiblist returned: %v", err) 108 return 109 } 110 } 111 112 // walk thru the ldconfig output and add entries which contain the filenames 113 // returned by nvidia-container-cli OR the nvliblist.conf file contents 114 cmd := exec.Command("ldconfig", "-p") 115 out, err := cmd.Output() 116 if err != nil { 117 sylog.Warningf("ldconfig execution error: %v", err) 118 return 119 } 120 121 // store library name with associated path 122 ldCache := make(map[string]string) 123 124 // store binaries/libraries path 125 bins := make(map[string]string) 126 libs := make(map[string]string) 127 128 // sample ldconfig -p output: 129 // libnvidia-ml.so.1 (libc6,x86-64) => /usr/lib64/nvidia/libnvidia-ml.so.1 130 r, err := regexp.Compile(`(?m)^(.*)\s*\(.*\)\s*=>\s*(.*)$`) 131 if err != nil { 132 return 133 } 134 135 // get elf machine to match correct libraries during ldconfig lookup 136 self, err := elf.Open("/proc/self/exe") 137 if err != nil { 138 return 139 } 140 141 machine := self.Machine 142 self.Close() 143 144 for _, match := range r.FindAllSubmatch(out, -1) { 145 if match != nil { 146 // libName is the "libnvidia-ml.so.1" (from the above example) 147 // libPath is the "/usr/lib64/nvidia/libnvidia-ml.so.1" (from the above example) 148 libName := strings.TrimSpace(string(match[1])) 149 libPath := strings.TrimSpace(string(match[2])) 150 151 ldCache[libPath] = libName 152 } 153 } 154 155 for _, nvidiaFileName := range strArray { 156 // if the file contains a ".so", treat it as a library 157 if strings.Contains(nvidiaFileName, ".so") { 158 for libPath, lib := range ldCache { 159 if strings.HasPrefix(lib, nvidiaFileName) { 160 if _, ok := libs[lib]; !ok { 161 elib, err := elf.Open(libPath) 162 if err != nil { 163 sylog.Debugf("ignore library %s: %s", lib, err) 164 continue 165 } 166 167 if elib.Machine == machine { 168 libs[lib] = libPath 169 libraries = append(libraries, libPath) 170 } 171 172 elib.Close() 173 } 174 } 175 } 176 } else { 177 // treat the file as a binary file - add it to the bind list 178 // no need to check the ldconfig output 179 binary, err := exec.LookPath(nvidiaFileName) 180 if err != nil { 181 continue 182 } 183 if _, ok := bins[binary]; !ok { 184 bins[binary] = binary 185 binaries = append(binaries, binary) 186 } 187 } 188 } 189 190 return 191 }