github.com/kata-containers/runtime@v0.0.0-20210505125100-04f29832a923/virtcontainers/device/config/config.go (about)

     1  // Copyright (c) 2017-2018 Intel Corporation
     2  // Copyright (c) 2018 Huawei Corporation
     3  //
     4  // SPDX-License-Identifier: Apache-2.0
     5  //
     6  
     7  package config
     8  
     9  import (
    10  	"fmt"
    11  	"io/ioutil"
    12  	"os"
    13  	"path/filepath"
    14  	"strconv"
    15  	"strings"
    16  
    17  	"github.com/go-ini/ini"
    18  	vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types"
    19  	"golang.org/x/sys/unix"
    20  )
    21  
    22  // DeviceType indicates device type
    23  type DeviceType string
    24  
    25  const (
    26  	// DeviceVFIO is the VFIO device type
    27  	DeviceVFIO DeviceType = "vfio"
    28  
    29  	// DeviceBlock is the block device type
    30  	DeviceBlock DeviceType = "block"
    31  
    32  	// DeviceGeneric is a generic device type
    33  	DeviceGeneric DeviceType = "generic"
    34  
    35  	//VhostUserSCSI - SCSI based vhost-user type
    36  	VhostUserSCSI = "vhost-user-scsi-pci"
    37  
    38  	//VhostUserNet - net based vhost-user type
    39  	VhostUserNet = "virtio-net-pci"
    40  
    41  	//VhostUserBlk represents a block vhostuser device type
    42  	VhostUserBlk = "vhost-user-blk-pci"
    43  
    44  	//VhostUserFS represents a virtio-fs vhostuser device type
    45  	VhostUserFS = "vhost-user-fs-pci"
    46  )
    47  
    48  const (
    49  	// VirtioMmio means use virtio-mmio for mmio based drives
    50  	VirtioMmio = "virtio-mmio"
    51  
    52  	// VirtioBlock means use virtio-blk for hotplugging drives
    53  	VirtioBlock = "virtio-blk"
    54  
    55  	// VirtioBlockCCW means use virtio-blk for hotplugging drives
    56  	VirtioBlockCCW = "virtio-blk-ccw"
    57  
    58  	// VirtioSCSI means use virtio-scsi for hotplugging drives
    59  	VirtioSCSI = "virtio-scsi"
    60  
    61  	// Nvdimm means use nvdimm for hotplugging drives
    62  	Nvdimm = "nvdimm"
    63  )
    64  
    65  const (
    66  	// Virtio9P means use virtio-9p for the shared file system
    67  	Virtio9P = "virtio-9p"
    68  
    69  	// VirtioFS means use virtio-fs for the shared file system
    70  	VirtioFS = "virtio-fs"
    71  )
    72  
    73  const (
    74  	// The OCI spec requires the major-minor number to be provided for a
    75  	// device. We have chosen the below major numbers to represent
    76  	// vhost-user devices.
    77  	VhostUserBlkMajor  = 241
    78  	VhostUserSCSIMajor = 242
    79  )
    80  
    81  // Defining these as a variable instead of a const, to allow
    82  // overriding this in the tests.
    83  
    84  // SysDevPrefix is static string of /sys/dev
    85  var SysDevPrefix = "/sys/dev"
    86  
    87  // SysIOMMUPath is static string of /sys/kernel/iommu_groups
    88  var SysIOMMUPath = "/sys/kernel/iommu_groups"
    89  
    90  // SysBusPciDevicesPath is static string of /sys/bus/pci/devices
    91  var SysBusPciDevicesPath = "/sys/bus/pci/devices"
    92  
    93  var getSysDevPath = getSysDevPathImpl
    94  
    95  // DeviceInfo is an embedded type that contains device data common to all types of devices.
    96  type DeviceInfo struct {
    97  	// Hostpath is device path on host
    98  	HostPath string
    99  
   100  	// ContainerPath is device path inside container
   101  	ContainerPath string `json:"-"`
   102  
   103  	// Type of device: c, b, u or p
   104  	// c , u - character(unbuffered)
   105  	// p - FIFO
   106  	// b - block(buffered) special file
   107  	// More info in mknod(1).
   108  	DevType string
   109  
   110  	// Major, minor numbers for device.
   111  	Major int64
   112  	Minor int64
   113  
   114  	// Pmem enabled persistent memory. Use HostPath as backing file
   115  	// for a nvdimm device in the guest.
   116  	Pmem bool
   117  
   118  	// If applicable, should this device be considered RO
   119  	ReadOnly bool
   120  
   121  	// ColdPlug specifies whether the device must be cold plugged (true)
   122  	// or hot plugged (false).
   123  	ColdPlug bool
   124  
   125  	// FileMode permission bits for the device.
   126  	FileMode os.FileMode
   127  
   128  	// id of the device owner.
   129  	UID uint32
   130  
   131  	// id of the device group.
   132  	GID uint32
   133  
   134  	// ID for the device that is passed to the hypervisor.
   135  	ID string
   136  
   137  	// DriverOptions is specific options for each device driver
   138  	// for example, for BlockDevice, we can set DriverOptions["blockDriver"]="virtio-blk"
   139  	DriverOptions map[string]string
   140  }
   141  
   142  // BlockDrive represents a block storage drive which may be used in case the storage
   143  // driver has an underlying block storage device.
   144  type BlockDrive struct {
   145  	// File is the path to the disk-image/device which will be used with this drive
   146  	File string
   147  
   148  	// Format of the drive
   149  	Format string
   150  
   151  	// ID is used to identify this drive in the hypervisor options.
   152  	ID string
   153  
   154  	// Index assigned to the drive. In case of virtio-scsi, this is used as SCSI LUN index
   155  	Index int
   156  
   157  	// MmioAddr is used to identify the slot at which the drive is attached (order?).
   158  	MmioAddr string
   159  
   160  	// PCIPath is the PCI path used to identify the slot at which the drive is attached.
   161  	PCIPath vcTypes.PciPath
   162  
   163  	// SCSI Address of the block device, in case the device is attached using SCSI driver
   164  	// SCSI address is in the format SCSI-Id:LUN
   165  	SCSIAddr string
   166  
   167  	// NvdimmID is the nvdimm id inside the VM
   168  	NvdimmID string
   169  
   170  	// VirtPath at which the device appears inside the VM, outside of the container mount namespace
   171  	VirtPath string
   172  
   173  	// DevNo identifies the css bus id for virtio-blk-ccw
   174  	DevNo string
   175  
   176  	// ShareRW enables multiple qemu instances to share the File
   177  	ShareRW bool
   178  
   179  	// ReadOnly sets the device file readonly
   180  	ReadOnly bool
   181  
   182  	// Pmem enables persistent memory. Use File as backing file
   183  	// for a nvdimm device in the guest
   184  	Pmem bool
   185  }
   186  
   187  // VFIODeviceType indicates VFIO device type
   188  type VFIODeviceType uint32
   189  
   190  const (
   191  	// VFIODeviceErrorType is the error type of VFIO device
   192  	VFIODeviceErrorType VFIODeviceType = iota
   193  
   194  	// VFIODeviceNormalType is a normal VFIO device type
   195  	VFIODeviceNormalType
   196  
   197  	// VFIODeviceMediatedType is a VFIO mediated device type
   198  	VFIODeviceMediatedType
   199  )
   200  
   201  // VFIODev represents a VFIO drive used for hotplugging
   202  type VFIODev struct {
   203  	// IsPCIe specifies device is PCIe or PCI
   204  	IsPCIe bool
   205  
   206  	// Type of VFIO device
   207  	Type VFIODeviceType
   208  
   209  	// ID is used to identify this drive in the hypervisor options.
   210  	ID string
   211  
   212  	// BDF (Bus:Device.Function) of the PCI address
   213  	BDF string
   214  
   215  	// sysfsdev of VFIO mediated device
   216  	SysfsDev string
   217  
   218  	// VendorID specifies vendor id
   219  	VendorID string
   220  
   221  	// DeviceID specifies device id
   222  	DeviceID string
   223  
   224  	// PCI Class Code
   225  	Class string
   226  
   227  	// Bus of VFIO PCIe device
   228  	Bus string
   229  }
   230  
   231  // RNGDev represents a random number generator device
   232  type RNGDev struct {
   233  	// ID is used to identify the device in the hypervisor options.
   234  	ID string
   235  	// Filename is the file to use as entropy source.
   236  	Filename string
   237  }
   238  
   239  // VhostUserDeviceAttrs represents data shared by most vhost-user devices
   240  type VhostUserDeviceAttrs struct {
   241  	DevID      string
   242  	SocketPath string
   243  	Type       DeviceType
   244  
   245  	// MacAddress is only meaningful for vhost user net device
   246  	MacAddress string
   247  
   248  	// These are only meaningful for vhost user fs devices
   249  	Tag       string
   250  	CacheSize uint32
   251  	Cache     string
   252  
   253  	// PCIPath is the PCI path used to identify the slot at which
   254  	// the drive is attached.  It is only meaningful for vhost
   255  	// user block devices
   256  	PCIPath vcTypes.PciPath
   257  
   258  	// Block index of the device if assigned
   259  	Index int
   260  }
   261  
   262  // GetHostPathFunc is function pointer used to mock GetHostPath in tests.
   263  var GetHostPathFunc = GetHostPath
   264  
   265  // GetVhostUserNodeStatFunc is function pointer used to mock GetVhostUserNodeStat
   266  // in tests. Through this functon, user can get device type information.
   267  var GetVhostUserNodeStatFunc = GetVhostUserNodeStat
   268  
   269  // GetHostPath is used to fetch the host path for the device.
   270  // The path passed in the spec refers to the path that should appear inside the container.
   271  // We need to find the actual device path on the host based on the major-minor numbers of the device.
   272  func GetHostPath(devInfo DeviceInfo, vhostUserStoreEnabled bool, vhostUserStorePath string) (string, error) {
   273  	if devInfo.ContainerPath == "" {
   274  		return "", fmt.Errorf("Empty path provided for device")
   275  	}
   276  
   277  	// Filter out vhost-user storage devices by device Major numbers.
   278  	if vhostUserStoreEnabled && devInfo.DevType == "b" &&
   279  		(devInfo.Major == VhostUserSCSIMajor || devInfo.Major == VhostUserBlkMajor) {
   280  		return getVhostUserHostPath(devInfo, vhostUserStorePath)
   281  	}
   282  
   283  	ueventPath := filepath.Join(getSysDevPath(devInfo), "uevent")
   284  	if _, err := os.Stat(ueventPath); err != nil {
   285  		// Some devices(eg. /dev/fuse, /dev/cuse) do not always implement sysfs interface under /sys/dev
   286  		// These devices are passed by default by docker.
   287  		//
   288  		// Simply return the path passed in the device configuration, this does mean that no device renames are
   289  		// supported for these devices.
   290  
   291  		if os.IsNotExist(err) {
   292  			return devInfo.ContainerPath, nil
   293  		}
   294  
   295  		return "", err
   296  	}
   297  
   298  	content, err := ini.Load(ueventPath)
   299  	if err != nil {
   300  		return "", err
   301  	}
   302  
   303  	devName, err := content.Section("").GetKey("DEVNAME")
   304  	if err != nil {
   305  		return "", err
   306  	}
   307  
   308  	return filepath.Join("/dev", devName.String()), nil
   309  }
   310  
   311  // getBackingFile is used to fetch the backing file for the device.
   312  func getBackingFile(devInfo DeviceInfo) (string, error) {
   313  	backingFilePath := filepath.Join(getSysDevPath(devInfo), "loop", "backing_file")
   314  	data, err := ioutil.ReadFile(backingFilePath)
   315  	if err != nil {
   316  		return "", err
   317  	}
   318  
   319  	return strings.TrimSpace(string(data)), nil
   320  }
   321  
   322  func getSysDevPathImpl(devInfo DeviceInfo) string {
   323  	var pathComp string
   324  
   325  	switch devInfo.DevType {
   326  	case "c", "u":
   327  		pathComp = "char"
   328  	case "b":
   329  		pathComp = "block"
   330  	default:
   331  		// Unsupported device types. Return nil error to ignore devices
   332  		// that cannot be handled currently.
   333  		return ""
   334  	}
   335  
   336  	format := strconv.FormatInt(devInfo.Major, 10) + ":" + strconv.FormatInt(devInfo.Minor, 10)
   337  	return filepath.Join(SysDevPrefix, pathComp, format)
   338  }
   339  
   340  // getVhostUserHostPath is used to fetch host path for the vhost-user device.
   341  // For vhost-user block device like vhost-user-blk or vhost-user-scsi, its
   342  // socket should be under directory "<vhostUserStorePath>/block/sockets/";
   343  // its corresponding device node should be under directory
   344  // "<vhostUserStorePath>/block/devices/"
   345  func getVhostUserHostPath(devInfo DeviceInfo, vhostUserStorePath string) (string, error) {
   346  	vhostUserDevNodePath := filepath.Join(vhostUserStorePath, "/block/devices/")
   347  	vhostUserSockPath := filepath.Join(vhostUserStorePath, "/block/sockets/")
   348  
   349  	sockFileName, err := getVhostUserDevName(vhostUserDevNodePath,
   350  		uint32(devInfo.Major), uint32(devInfo.Minor))
   351  	if err != nil {
   352  		return "", err
   353  	}
   354  
   355  	// Locate socket path of vhost-user device
   356  	sockFilePath := filepath.Join(vhostUserSockPath, sockFileName)
   357  	if _, err = os.Stat(sockFilePath); os.IsNotExist(err) {
   358  		return "", err
   359  	}
   360  
   361  	return sockFilePath, nil
   362  }
   363  
   364  func GetVhostUserNodeStat(devNodePath string, devNodeStat *unix.Stat_t) (err error) {
   365  	return unix.Stat(devNodePath, devNodeStat)
   366  }
   367  
   368  // Filter out name of the device node whose device type is Major:Minor from directory
   369  func getVhostUserDevName(dirname string, majorNum, minorNum uint32) (string, error) {
   370  	files, err := ioutil.ReadDir(dirname)
   371  	if err != nil {
   372  		return "", err
   373  	}
   374  
   375  	for _, file := range files {
   376  		var devStat unix.Stat_t
   377  
   378  		devFilePath := filepath.Join(dirname, file.Name())
   379  		err = GetVhostUserNodeStatFunc(devFilePath, &devStat)
   380  		if err != nil {
   381  			return "", err
   382  		}
   383  
   384  		devMajor := unix.Major(devStat.Rdev)
   385  		devMinor := unix.Minor(devStat.Rdev)
   386  		if devMajor == majorNum && devMinor == minorNum {
   387  			return file.Name(), nil
   388  		}
   389  	}
   390  
   391  	return "", fmt.Errorf("Required device node (%d:%d) doesn't exist under directory %s",
   392  		majorNum, minorNum, dirname)
   393  }