github.com/apptainer/singularity@v3.1.1+incompatible/internal/pkg/util/nvidiautils/nvidiautils.go (about)

     1  // Copyright (c) 2018, Sylabs Inc. All rights reserved.
     2  // This software is licensed under a 3-clause BSD license. Please consult the
     3  // LICENSE.md file distributed with the sources of this project regarding your
     4  // rights to use or distribute this software.
     5  
     6  package nvidiautils
     7  
     8  import (
     9  	"bufio"
    10  	"bytes"
    11  	"debug/elf"
    12  	"fmt"
    13  	"os"
    14  	"os/exec"
    15  	"path/filepath"
    16  	"regexp"
    17  	"strings"
    18  
    19  	"github.com/sylabs/singularity/internal/pkg/sylog"
    20  )
    21  
    22  // generate bind list using nvidia-container-cli
    23  func nvidiaContainerCli() ([]string, error) {
    24  	var strArray []string
    25  
    26  	// use nvidia-container-cli (if present)
    27  	command, err := exec.LookPath("nvidia-container-cli")
    28  	if err != nil {
    29  		return nil, fmt.Errorf("no nvidia-container-cli present: %v", err)
    30  	}
    31  
    32  	cmd := exec.Command(command, "list", "--binaries", "--libraries")
    33  	out, err := cmd.Output()
    34  	if err != nil {
    35  		return nil, fmt.Errorf("Unable to execute nvidia-container-cli: %v", err)
    36  	}
    37  
    38  	reader := bytes.NewReader(out)
    39  	scanner := bufio.NewScanner(reader)
    40  
    41  	for scanner.Scan() {
    42  		line := strings.TrimSpace(scanner.Text())
    43  
    44  		if line != "" {
    45  			// if this is a library, then add a .so entry as well
    46  			if strings.Contains(line, ".so") {
    47  				fileName := filepath.Base(line)
    48  				strArray = append(strArray, fileName) // add entry to list to be bound
    49  
    50  				// strip off .xxx.xx prefix and add so entry as well
    51  				newentry := strings.SplitAfter(fileName, ".so")
    52  				strArray = append(strArray, newentry[0]) // add prefix (filepath.so)
    53  			} else {
    54  				// Assume we're a binary and need the full path
    55  				strArray = append(strArray, line)
    56  			}
    57  		}
    58  	}
    59  	return strArray, nil
    60  }
    61  
    62  // generate bind list using contents of nvliblist.conf
    63  func nvidiaLiblist(abspath string) ([]string, error) {
    64  	var strArray []string
    65  
    66  	// grab the entries in nvliblist.conf file
    67  	file, err := os.Open(abspath + "/nvliblist.conf")
    68  	if err != nil {
    69  		return nil, fmt.Errorf("%v", err)
    70  	}
    71  	defer file.Close()
    72  
    73  	scanner := bufio.NewScanner(file)
    74  	for scanner.Scan() {
    75  		line := strings.TrimSpace(scanner.Text())
    76  		if !strings.HasPrefix(line, "#") && line != "" {
    77  			strArray = append(strArray, line)
    78  		}
    79  	}
    80  	return strArray, nil
    81  }
    82  
    83  // GetNvidiaPath returns a string array consisting of filepaths of nvidia
    84  // related files to be added to the BindPaths
    85  func GetNvidiaPath(abspath string, envPath string) (libraries []string, binaries []string, err error) {
    86  	var strArray []string
    87  
    88  	// replace PATH with custom environment variable
    89  	// and restore it when returning
    90  	if envPath != "" {
    91  		oldPath := os.Getenv("PATH")
    92  		os.Setenv("PATH", envPath)
    93  
    94  		defer os.Setenv("PATH", oldPath)
    95  	}
    96  
    97  	// use nvidia-container-cli if present
    98  	strArray, err = nvidiaContainerCli()
    99  	if err != nil {
   100  		sylog.Verbosef("nvidiaContainercli returned: %v", err)
   101  		sylog.Verbosef("Falling back to nvliblist.conf")
   102  
   103  		// nvidia-container-cli not present or errored out
   104  		// fallback is to use nvliblist.conf
   105  		strArray, err = nvidiaLiblist(abspath)
   106  		if err != nil {
   107  			sylog.Warningf("nvidiaLiblist returned: %v", err)
   108  			return
   109  		}
   110  	}
   111  
   112  	// walk thru the ldconfig output and add entries which contain the filenames
   113  	// returned by nvidia-container-cli OR the nvliblist.conf file contents
   114  	cmd := exec.Command("ldconfig", "-p")
   115  	out, err := cmd.Output()
   116  	if err != nil {
   117  		sylog.Warningf("ldconfig execution error: %v", err)
   118  		return
   119  	}
   120  
   121  	// store library name with associated path
   122  	ldCache := make(map[string]string)
   123  
   124  	// store binaries/libraries path
   125  	bins := make(map[string]string)
   126  	libs := make(map[string]string)
   127  
   128  	// sample ldconfig -p output:
   129  	//  libnvidia-ml.so.1 (libc6,x86-64) => /usr/lib64/nvidia/libnvidia-ml.so.1
   130  	r, err := regexp.Compile(`(?m)^(.*)\s*\(.*\)\s*=>\s*(.*)$`)
   131  	if err != nil {
   132  		return
   133  	}
   134  
   135  	// get elf machine to match correct libraries during ldconfig lookup
   136  	self, err := elf.Open("/proc/self/exe")
   137  	if err != nil {
   138  		return
   139  	}
   140  
   141  	machine := self.Machine
   142  	self.Close()
   143  
   144  	for _, match := range r.FindAllSubmatch(out, -1) {
   145  		if match != nil {
   146  			// libName is the "libnvidia-ml.so.1" (from the above example)
   147  			// libPath is the "/usr/lib64/nvidia/libnvidia-ml.so.1" (from the above example)
   148  			libName := strings.TrimSpace(string(match[1]))
   149  			libPath := strings.TrimSpace(string(match[2]))
   150  
   151  			ldCache[libPath] = libName
   152  		}
   153  	}
   154  
   155  	for _, nvidiaFileName := range strArray {
   156  		// if the file contains a ".so", treat it as a library
   157  		if strings.Contains(nvidiaFileName, ".so") {
   158  			for libPath, lib := range ldCache {
   159  				if strings.HasPrefix(lib, nvidiaFileName) {
   160  					if _, ok := libs[lib]; !ok {
   161  						elib, err := elf.Open(libPath)
   162  						if err != nil {
   163  							sylog.Debugf("ignore library %s: %s", lib, err)
   164  							continue
   165  						}
   166  
   167  						if elib.Machine == machine {
   168  							libs[lib] = libPath
   169  							libraries = append(libraries, libPath)
   170  						}
   171  
   172  						elib.Close()
   173  					}
   174  				}
   175  			}
   176  		} else {
   177  			// treat the file as a binary file - add it to the bind list
   178  			// no need to check the ldconfig output
   179  			binary, err := exec.LookPath(nvidiaFileName)
   180  			if err != nil {
   181  				continue
   182  			}
   183  			if _, ok := bins[binary]; !ok {
   184  				bins[binary] = binary
   185  				binaries = append(binaries, binary)
   186  			}
   187  		}
   188  	}
   189  
   190  	return
   191  }