github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/fsimpl/sys/pci.go (about)

     1  // Copyright 2023 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sys
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"path"
    21  	regex "regexp"
    22  
    23  	"golang.org/x/sys/unix"
    24  	"github.com/metacubex/gvisor/pkg/abi/linux"
    25  	"github.com/metacubex/gvisor/pkg/context"
    26  	"github.com/metacubex/gvisor/pkg/fsutil"
    27  	"github.com/metacubex/gvisor/pkg/sentry/fsimpl/kernfs"
    28  	"github.com/metacubex/gvisor/pkg/sentry/kernel/auth"
    29  )
    30  
    31  const (
    32  	pciMainBusDevicePath = "/sys/devices/pci0000:00"
    33  	accelDevice          = "accel"
    34  	vfioDevice           = "vfio-dev"
    35  )
    36  
    37  var (
    38  	// Matches PCI device addresses in the main domain.
    39  	pciDeviceRegex = regex.MustCompile(`0000:([a-fA-F0-9]{2}|[a-fA-F0-9]{4}):[a-fA-F0-9]{2}\.[a-fA-F0-9]{1,2}`)
    40  	// Matches the directories for the main bus (i.e. pci000:00),
    41  	// individual devices (e.g. 00:00:04.0), accel (TPU v4), and vfio (TPU v5)
    42  	sysDevicesDirRegex = regex.MustCompile(`pci0000:00|accel|vfio|(0000:([a-fA-F0-9]{2}|[a-fA-F0-9]{4}):[a-fA-F0-9]{2}\.[a-fA-F0-9]{1,2})`)
    43  	// Files allowlisted for host passthrough. These files are read-only.
    44  	sysDevicesFiles = map[string]any{
    45  		"vendor": nil, "device": nil, "subsystem_vendor": nil, "subsystem_device": nil,
    46  		"revision": nil, "class": nil, "numa_node": nil,
    47  		"resource": nil, "pci_address": nil, "dev": nil, "driver_version": nil,
    48  		"reset_count": nil, "write_open_count": nil, "status": nil,
    49  		"is_device_owned": nil, "device_owner": nil, "framework_version": nil,
    50  		"user_mem_ranges": nil, "interrupt_counts": nil, "chip_model": nil,
    51  		"bar_offsets": nil, "bar_sizes": nil, "resource0": nil, "resource1": nil,
    52  		"resource2": nil, "resource3": nil, "resource4": nil, "resource5": nil,
    53  	}
    54  )
    55  
    56  // Creates TPU devices' symlinks under /sys/class/. TPU device types that are
    57  // not present on host will be ignored.
    58  //
    59  // TPU v4 symlinks are created at /sys/class/accel/accel#.
    60  // TPU v5 symlinks go to /sys/class/vfio-dev/vfio#.
    61  func (fs *filesystem) newDeviceClassDir(ctx context.Context, creds *auth.Credentials, tpuDeviceTypes []string, pciMainBusDevicePath string) (map[string]map[string]kernfs.Inode, error) {
    62  	dirs := map[string]map[string]kernfs.Inode{}
    63  	pciDents, err := hostDirEntries(pciMainBusDevicePath)
    64  	if err != nil {
    65  		return nil, err
    66  	}
    67  	for _, pciDent := range pciDents {
    68  		for _, tpuDeviceType := range tpuDeviceTypes {
    69  			subPath := path.Join(pciMainBusDevicePath, pciDent, tpuDeviceType)
    70  			dirs[tpuDeviceType] = map[string]kernfs.Inode{}
    71  			deviceDents, err := hostDirEntries(subPath)
    72  			if err != nil {
    73  				// Skips the path that doesn't exist.
    74  				if err == unix.ENOENT {
    75  					continue
    76  				}
    77  				return nil, err
    78  			}
    79  			if numOfDeviceDents := len(deviceDents); numOfDeviceDents != 1 {
    80  				return nil, fmt.Errorf("exactly one entry is expected at %v while there are %d", subPath, numOfDeviceDents)
    81  			}
    82  			dirs[tpuDeviceType][deviceDents[0]] = kernfs.NewStaticSymlink(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), fmt.Sprintf("../../devices/pci0000:00/%s/%s/%s", pciDent, tpuDeviceType, deviceDents[0]))
    83  		}
    84  	}
    85  	if len(dirs) == 0 {
    86  		return nil, errors.New("no TPU device sysfile is found")
    87  	}
    88  	return dirs, nil
    89  }
    90  
    91  // Create /sys/bus/pci/devices symlinks.
    92  func (fs *filesystem) newBusPCIDevicesDir(ctx context.Context, creds *auth.Credentials, pciMainBusDevicePath string) (map[string]kernfs.Inode, error) {
    93  	pciDevicesDir := map[string]kernfs.Inode{}
    94  	pciDents, err := hostDirEntries(pciMainBusDevicePath)
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	for _, pciDent := range pciDents {
    99  		pciDevicesDir[pciDent] = kernfs.NewStaticSymlink(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), fmt.Sprintf("../../../devices/pci0000:00/%s", pciDent))
   100  	}
   101  
   102  	return pciDevicesDir, nil
   103  }
   104  
   105  // Recursively build out sysfs directories according to the allowlisted files,
   106  // directories, and symlinks defined in this package.
   107  func (fs *filesystem) mirrorPCIBusDeviceDir(ctx context.Context, creds *auth.Credentials, dir string, iommuGroups map[string]string) (map[string]kernfs.Inode, error) {
   108  	subs := map[string]kernfs.Inode{}
   109  	dents, err := hostDirEntries(dir)
   110  	if err != nil {
   111  		return nil, err
   112  	}
   113  	for _, dent := range dents {
   114  		dentPath := path.Join(dir, dent)
   115  		dentMode, err := hostFileMode(dentPath)
   116  		if err != nil {
   117  			return nil, err
   118  		}
   119  		switch dentMode {
   120  		case unix.S_IFDIR:
   121  			if match := sysDevicesDirRegex.MatchString(dent); !match {
   122  				continue
   123  			}
   124  			contents, err := fs.mirrorPCIBusDeviceDir(ctx, creds, dentPath, iommuGroups)
   125  			if err != nil {
   126  				return nil, err
   127  			}
   128  			subs[dent] = fs.newDir(ctx, creds, defaultSysMode, contents)
   129  		case unix.S_IFREG:
   130  			if _, ok := sysDevicesFiles[dent]; ok {
   131  				subs[dent] = fs.newHostFile(ctx, creds, defaultSysMode, dentPath)
   132  			}
   133  		case unix.S_IFLNK:
   134  			linkContent := ""
   135  			switch {
   136  			case pciDeviceRegex.MatchString(dent) || dent == "device":
   137  				pciDeviceName, err := pciDeviceName(dir)
   138  				if err != nil {
   139  					return nil, err
   140  				}
   141  				// Both the device and PCI address entries are links to the original PCI
   142  				// device directory that's at the same place earlier in the dir tree.
   143  				linkContent = fmt.Sprintf("../../../%s", pciDeviceName)
   144  			case dent == "iommu_group":
   145  				pciDeviceName, err := pciDeviceName(dir)
   146  				if err != nil {
   147  					return nil, err
   148  				}
   149  				iommuGroupNum, exist := iommuGroups[pciDeviceName]
   150  				if !exist {
   151  					return nil, fmt.Errorf("no IOMMU group is found for device %v", pciDeviceName)
   152  				}
   153  				linkContent = fmt.Sprintf("../../../kernel/iommu_groups/%s", iommuGroupNum)
   154  			default:
   155  				continue
   156  			}
   157  			subs[dent] = kernfs.NewStaticSymlink(ctx, creds, linux.UNNAMED_MAJOR, fs.devMinor, fs.NextIno(), linkContent)
   158  		}
   159  	}
   160  	return subs, nil
   161  }
   162  
   163  // Infer a PCI device's name from its path.
   164  func pciDeviceName(pciDevicePath string) (string, error) {
   165  	pciDeviceName := pciDeviceRegex.FindString(pciDevicePath)
   166  	if pciDeviceName == "" {
   167  		return "", fmt.Errorf("no valid device name for the device path at %v", pciDevicePath)
   168  	}
   169  	return pciDeviceName, nil
   170  }
   171  
   172  func hostFileMode(path string) (uint32, error) {
   173  	fd, err := unix.Openat(-1, path, unix.O_RDONLY|unix.O_NOFOLLOW|unix.O_PATH, 0)
   174  	if err != nil {
   175  		return 0, err
   176  	}
   177  	stat := unix.Stat_t{}
   178  	if err := unix.Fstat(fd, &stat); err != nil {
   179  		return 0, err
   180  	}
   181  	return stat.Mode & unix.S_IFMT, nil
   182  }
   183  
   184  func hostDirEntries(path string) ([]string, error) {
   185  	fd, err := unix.Openat(-1, path, unix.O_RDONLY|unix.O_NOFOLLOW, 0)
   186  	if err != nil {
   187  		return nil, err
   188  	}
   189  	defer unix.Close(fd)
   190  	return fsutil.DirentNames(fd)
   191  }