github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/device/device.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package device defines reserved virtual kernel devices and structures
    16  // for managing them.
    17  package device
    18  
    19  import (
    20  	"bytes"
    21  	"fmt"
    22  	"sync/atomic"
    23  
    24  	"github.com/SagerNet/gvisor/pkg/abi/linux"
    25  	"github.com/SagerNet/gvisor/pkg/sync"
    26  )
    27  
    28  // Registry tracks all simple devices and related state on the system for
    29  // save/restore.
    30  //
    31  // The set of devices across save/restore must remain consistent. That is, no
    32  // devices may be created or removed on restore relative to the saved
    33  // system. Practically, this means do not create new devices specifically as
    34  // part of restore.
    35  //
    36  // +stateify savable
    37  type Registry struct {
    38  	// lastAnonDeviceMinor is the last minor device number used for an anonymous
    39  	// device. Must be accessed atomically.
    40  	lastAnonDeviceMinor uint64
    41  
    42  	// mu protects the fields below.
    43  	mu sync.Mutex `state:"nosave"`
    44  
    45  	devices map[ID]*Device
    46  }
    47  
    48  // SimpleDevices is the system-wide simple device registry. This is
    49  // saved/restored by kernel.Kernel, but defined here to allow access without
    50  // depending on the kernel package. See kernel.Kernel.deviceRegistry.
    51  var SimpleDevices = newRegistry()
    52  
    53  func newRegistry() *Registry {
    54  	return &Registry{
    55  		devices: make(map[ID]*Device),
    56  	}
    57  }
    58  
    59  // newAnonID assigns a major and minor number to an anonymous device ID.
    60  func (r *Registry) newAnonID() ID {
    61  	return ID{
    62  		// Anon devices always have a major number of 0.
    63  		Major: 0,
    64  		// Use the next minor number.
    65  		Minor: atomic.AddUint64(&r.lastAnonDeviceMinor, 1),
    66  	}
    67  }
    68  
    69  // newAnonDevice allocates a new anonymous device with a unique minor device
    70  // number, and registers it with r.
    71  func (r *Registry) newAnonDevice() *Device {
    72  	r.mu.Lock()
    73  	defer r.mu.Unlock()
    74  	d := &Device{
    75  		ID: r.newAnonID(),
    76  	}
    77  	r.devices[d.ID] = d
    78  	return d
    79  }
    80  
    81  // LoadFrom initializes the internal state of all devices in r from other. The
    82  // set of devices in both registries must match. Devices may not be created or
    83  // destroyed across save/restore.
    84  func (r *Registry) LoadFrom(other *Registry) {
    85  	r.mu.Lock()
    86  	defer r.mu.Unlock()
    87  	other.mu.Lock()
    88  	defer other.mu.Unlock()
    89  	if len(r.devices) != len(other.devices) {
    90  		panic(fmt.Sprintf("Devices were added or removed when restoring the registry:\nnew:\n%+v\nold:\n%+v", r.devices, other.devices))
    91  	}
    92  	for id, otherD := range other.devices {
    93  		ourD, ok := r.devices[id]
    94  		if !ok {
    95  			panic(fmt.Sprintf("Device %+v could not be restored as it wasn't defined in the new registry", otherD))
    96  		}
    97  		ourD.loadFrom(otherD)
    98  	}
    99  	atomic.StoreUint64(&r.lastAnonDeviceMinor, atomic.LoadUint64(&other.lastAnonDeviceMinor))
   100  }
   101  
   102  // ID identifies a device.
   103  //
   104  // +stateify savable
   105  type ID struct {
   106  	Major uint64
   107  	Minor uint64
   108  }
   109  
   110  // DeviceID formats a major and minor device number into a standard device number.
   111  func (i *ID) DeviceID() uint64 {
   112  	return uint64(linux.MakeDeviceID(uint16(i.Major), uint32(i.Minor)))
   113  }
   114  
   115  // NewAnonDevice creates a new anonymous device. Packages that require an anonymous
   116  // device should initialize the device in a global variable in a file called device.go:
   117  //
   118  // var myDevice = device.NewAnonDevice()
   119  func NewAnonDevice() *Device {
   120  	return SimpleDevices.newAnonDevice()
   121  }
   122  
   123  // NewAnonMultiDevice creates a new multi-keyed anonymous device. Packages that require
   124  // a multi-key anonymous device should initialize the device in a global variable in a
   125  // file called device.go:
   126  //
   127  // var myDevice = device.NewAnonMultiDevice()
   128  func NewAnonMultiDevice() *MultiDevice {
   129  	return &MultiDevice{
   130  		ID: SimpleDevices.newAnonID(),
   131  	}
   132  }
   133  
   134  // Device is a simple virtual kernel device.
   135  //
   136  // +stateify savable
   137  type Device struct {
   138  	ID
   139  
   140  	// last is the last generated inode.
   141  	last uint64
   142  }
   143  
   144  // loadFrom initializes d from other. The IDs of both devices must match.
   145  func (d *Device) loadFrom(other *Device) {
   146  	if d.ID != other.ID {
   147  		panic(fmt.Sprintf("Attempting to initialize a device %+v from %+v, but device IDs don't match", d, other))
   148  	}
   149  	atomic.StoreUint64(&d.last, atomic.LoadUint64(&other.last))
   150  }
   151  
   152  // NextIno generates a new inode number
   153  func (d *Device) NextIno() uint64 {
   154  	return atomic.AddUint64(&d.last, 1)
   155  }
   156  
   157  // MultiDeviceKey provides a hashable key for a MultiDevice. The key consists
   158  // of a raw device and inode for a resource, which must consistently identify
   159  // the unique resource.  It may optionally include a secondary device if
   160  // appropriate.
   161  //
   162  // Note that using the path is not enough, because filesystems may rename a file
   163  // to a different backing resource, at which point the path points to a different
   164  // entity.  Using only the inode is also not enough because the inode is assumed
   165  // to be unique only within the device on which the resource exists.
   166  type MultiDeviceKey struct {
   167  	Device          uint64
   168  	SecondaryDevice string
   169  	Inode           uint64
   170  }
   171  
   172  // String stringifies the key.
   173  func (m MultiDeviceKey) String() string {
   174  	return fmt.Sprintf("key{device: %d, sdevice: %s, inode: %d}", m.Device, m.SecondaryDevice, m.Inode)
   175  }
   176  
   177  // MultiDevice allows for remapping resources that come from a variety of raw
   178  // devices into a single device.  The device ID should be one of the static
   179  // Device IDs above and cannot be reused.
   180  type MultiDevice struct {
   181  	ID
   182  
   183  	mu     sync.Mutex
   184  	last   uint64
   185  	cache  map[MultiDeviceKey]uint64
   186  	rcache map[uint64]MultiDeviceKey
   187  }
   188  
   189  // String stringifies MultiDevice.
   190  func (m *MultiDevice) String() string {
   191  	m.mu.Lock()
   192  	defer m.mu.Unlock()
   193  
   194  	buf := bytes.NewBuffer(nil)
   195  	buf.WriteString("cache{")
   196  	for k, v := range m.cache {
   197  		buf.WriteString(fmt.Sprintf("%s -> %d, ", k, v))
   198  	}
   199  	buf.WriteString("}")
   200  	return buf.String()
   201  }
   202  
   203  // Map maps a raw device and inode into the inode space of MultiDevice,
   204  // returning a virtualized inode.  Raw devices and inodes can be reused;
   205  // in this case, the same virtual inode will be returned.
   206  func (m *MultiDevice) Map(key MultiDeviceKey) uint64 {
   207  	m.mu.Lock()
   208  	defer m.mu.Unlock()
   209  
   210  	if m.cache == nil {
   211  		m.cache = make(map[MultiDeviceKey]uint64)
   212  		m.rcache = make(map[uint64]MultiDeviceKey)
   213  	}
   214  
   215  	id, ok := m.cache[key]
   216  	if ok {
   217  		return id
   218  	}
   219  	// Step over reserved entries that may have been loaded.
   220  	idx := m.last + 1
   221  	for {
   222  		if _, ok := m.rcache[idx]; !ok {
   223  			break
   224  		}
   225  		idx++
   226  	}
   227  	// We found a non-reserved entry, use it.
   228  	m.last = idx
   229  	m.cache[key] = m.last
   230  	m.rcache[m.last] = key
   231  	return m.last
   232  }
   233  
   234  // Load loads a raw device and inode into MultiDevice inode mappings
   235  // with value as the virtual inode.
   236  //
   237  // By design, inodes start from 1 and continue until max uint64.  This means
   238  // that the zero value, which is often the uninitialized value, can be rejected
   239  // as invalid.
   240  func (m *MultiDevice) Load(key MultiDeviceKey, value uint64) bool {
   241  	// Reject the uninitialized value; see comment above.
   242  	if value == 0 {
   243  		return false
   244  	}
   245  
   246  	m.mu.Lock()
   247  	defer m.mu.Unlock()
   248  
   249  	if m.cache == nil {
   250  		m.cache = make(map[MultiDeviceKey]uint64)
   251  		m.rcache = make(map[uint64]MultiDeviceKey)
   252  	}
   253  
   254  	if val, exists := m.cache[key]; exists && val != value {
   255  		return false
   256  	}
   257  	if k, exists := m.rcache[value]; exists && k != key {
   258  		// Should never happen.
   259  		panic(fmt.Sprintf("MultiDevice's caches are inconsistent, current: %+v, previous: %+v", key, k))
   260  	}
   261  
   262  	// Cache value at key.
   263  	m.cache[key] = value
   264  
   265  	// Prevent value from being used by new inode mappings.
   266  	m.rcache[value] = key
   267  
   268  	return true
   269  }