github.com/metacubex/gvisor@v0.0.0-20240320004321-933faba989ec/pkg/sentry/platform/kvm/kvm.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package kvm provides a kvm-based implementation of the platform interface.
    16  package kvm
    17  
    18  import (
    19  	"fmt"
    20  	"os"
    21  
    22  	"golang.org/x/sys/unix"
    23  	pkgcontext "github.com/metacubex/gvisor/pkg/context"
    24  	"github.com/metacubex/gvisor/pkg/hostarch"
    25  	"github.com/metacubex/gvisor/pkg/ring0"
    26  	"github.com/metacubex/gvisor/pkg/ring0/pagetables"
    27  	"github.com/metacubex/gvisor/pkg/sentry/platform"
    28  	"github.com/metacubex/gvisor/pkg/sync"
    29  )
    30  
    31  // userMemoryRegion is a region of physical memory.
    32  //
    33  // This mirrors kvm_memory_region.
    34  type userMemoryRegion struct {
    35  	slot          uint32
    36  	flags         uint32
    37  	guestPhysAddr uint64
    38  	memorySize    uint64
    39  	userspaceAddr uint64
    40  }
    41  
    42  // runData is the run structure. This may be mapped for synchronous register
    43  // access (although that doesn't appear to be supported by my kernel at least).
    44  //
    45  // This mirrors kvm_run.
    46  type runData struct {
    47  	requestInterruptWindow uint8
    48  	_                      [7]uint8
    49  
    50  	exitReason                 uint32
    51  	readyForInterruptInjection uint8
    52  	ifFlag                     uint8
    53  	_                          [2]uint8
    54  
    55  	cr8      uint64
    56  	apicBase uint64
    57  
    58  	// This is the union data for exits. Interpretation depends entirely on
    59  	// the exitReason above (see vCPU code for more information).
    60  	data [32]uint64
    61  }
    62  
    63  // KVM represents a lightweight VM context.
    64  type KVM struct {
    65  	platform.NoCPUPreemptionDetection
    66  
    67  	// KVM never changes mm_structs.
    68  	platform.UseHostProcessMemoryBarrier
    69  
    70  	platform.DoesOwnPageTables
    71  
    72  	// machine is the backing VM.
    73  	machine *machine
    74  }
    75  
    76  var (
    77  	globalOnce sync.Once
    78  	globalErr  error
    79  )
    80  
    81  // OpenDevice opens the KVM device and returns the File.
    82  // If the devicePath is empty, it will default to /dev/kvm.
    83  func OpenDevice(devicePath string) (*os.File, error) {
    84  	if devicePath == "" {
    85  		devicePath = "/dev/kvm"
    86  	}
    87  	f, err := os.OpenFile(devicePath, unix.O_RDWR, 0)
    88  	if err != nil {
    89  		return nil, fmt.Errorf("error opening KVM device file (%s): %v", devicePath, err)
    90  	}
    91  	return f, nil
    92  }
    93  
    94  // New returns a new KVM-based implementation of the platform interface.
    95  func New(deviceFile *os.File) (*KVM, error) {
    96  	fd := deviceFile.Fd()
    97  
    98  	// Ensure global initialization is done.
    99  	globalOnce.Do(func() {
   100  		globalErr = updateGlobalOnce(int(fd))
   101  	})
   102  	if globalErr != nil {
   103  		return nil, globalErr
   104  	}
   105  
   106  	// Create a new VM fd.
   107  	var (
   108  		vm    uintptr
   109  		errno unix.Errno
   110  	)
   111  	for {
   112  		vm, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, KVM_CREATE_VM, 0)
   113  		if errno == unix.EINTR {
   114  			continue
   115  		}
   116  		if errno != 0 {
   117  			return nil, fmt.Errorf("creating VM: %v", errno)
   118  		}
   119  		break
   120  	}
   121  	// We are done with the device file.
   122  	deviceFile.Close()
   123  
   124  	// Create a VM context.
   125  	machine, err := newMachine(int(vm))
   126  	if err != nil {
   127  		return nil, err
   128  	}
   129  
   130  	// All set.
   131  	return &KVM{
   132  		machine: machine,
   133  	}, nil
   134  }
   135  
   136  // SupportsAddressSpaceIO implements platform.Platform.SupportsAddressSpaceIO.
   137  func (*KVM) SupportsAddressSpaceIO() bool {
   138  	return false
   139  }
   140  
   141  // CooperativelySchedulesAddressSpace implements platform.Platform.CooperativelySchedulesAddressSpace.
   142  func (*KVM) CooperativelySchedulesAddressSpace() bool {
   143  	return false
   144  }
   145  
   146  // MapUnit implements platform.Platform.MapUnit.
   147  func (*KVM) MapUnit() uint64 {
   148  	// We greedily creates PTEs in MapFile, so extremely large mappings can
   149  	// be expensive. Not _that_ expensive since we allow super pages, but
   150  	// even though can get out of hand if you're creating multi-terabyte
   151  	// mappings. For this reason, we limit mappings to an arbitrary 16MB.
   152  	return 16 << 20
   153  }
   154  
   155  // MinUserAddress returns the lowest available address.
   156  func (*KVM) MinUserAddress() hostarch.Addr {
   157  	return hostarch.PageSize
   158  }
   159  
   160  // MaxUserAddress returns the first address that may not be used.
   161  func (*KVM) MaxUserAddress() hostarch.Addr {
   162  	return hostarch.Addr(ring0.MaximumUserAddress)
   163  }
   164  
   165  // NewAddressSpace returns a new pagetable root.
   166  func (k *KVM) NewAddressSpace(any) (platform.AddressSpace, <-chan struct{}, error) {
   167  	// Allocate page tables and install system mappings.
   168  	pageTables := pagetables.NewWithUpper(newAllocator(), k.machine.upperSharedPageTables, ring0.KernelStartAddress)
   169  
   170  	// Return the new address space.
   171  	return &addressSpace{
   172  		machine:    k.machine,
   173  		pageTables: pageTables,
   174  		dirtySet:   k.machine.newDirtySet(),
   175  	}, nil, nil
   176  }
   177  
   178  // NewContext returns an interruptible context.
   179  func (k *KVM) NewContext(pkgcontext.Context) platform.Context {
   180  	return &platformContext{
   181  		machine: k.machine,
   182  	}
   183  }
   184  
   185  type constructor struct{}
   186  
   187  func (*constructor) New(f *os.File) (platform.Platform, error) {
   188  	return New(f)
   189  }
   190  
   191  func (*constructor) OpenDevice(devicePath string) (*os.File, error) {
   192  	return OpenDevice(devicePath)
   193  }
   194  
   195  // Flags implements platform.Constructor.Flags().
   196  func (*constructor) Requirements() platform.Requirements {
   197  	return platform.Requirements{}
   198  }
   199  
   200  func init() {
   201  	platform.Register("kvm", &constructor{})
   202  }