github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/sentry/platform/kvm/kvm.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package kvm provides a kvm-based implementation of the platform interface. 16 package kvm 17 18 import ( 19 "fmt" 20 "os" 21 22 "golang.org/x/sys/unix" 23 "github.com/SagerNet/gvisor/pkg/hostarch" 24 "github.com/SagerNet/gvisor/pkg/ring0" 25 "github.com/SagerNet/gvisor/pkg/ring0/pagetables" 26 "github.com/SagerNet/gvisor/pkg/sentry/platform" 27 "github.com/SagerNet/gvisor/pkg/sync" 28 ) 29 30 // userMemoryRegion is a region of physical memory. 31 // 32 // This mirrors kvm_memory_region. 33 type userMemoryRegion struct { 34 slot uint32 35 flags uint32 36 guestPhysAddr uint64 37 memorySize uint64 38 userspaceAddr uint64 39 } 40 41 // runData is the run structure. This may be mapped for synchronous register 42 // access (although that doesn't appear to be supported by my kernel at least). 43 // 44 // This mirrors kvm_run. 45 type runData struct { 46 requestInterruptWindow uint8 47 _ [7]uint8 48 49 exitReason uint32 50 readyForInterruptInjection uint8 51 ifFlag uint8 52 _ [2]uint8 53 54 cr8 uint64 55 apicBase uint64 56 57 // This is the union data for exits. Interpretation depends entirely on 58 // the exitReason above (see vCPU code for more information). 59 data [32]uint64 60 } 61 62 // KVM represents a lightweight VM context. 63 type KVM struct { 64 platform.NoCPUPreemptionDetection 65 66 // KVM never changes mm_structs. 67 platform.UseHostProcessMemoryBarrier 68 69 // machine is the backing VM. 70 machine *machine 71 } 72 73 var ( 74 globalOnce sync.Once 75 globalErr error 76 ) 77 78 // OpenDevice opens the KVM device at /dev/kvm and returns the File. 79 func OpenDevice() (*os.File, error) { 80 f, err := os.OpenFile("/dev/kvm", unix.O_RDWR, 0) 81 if err != nil { 82 return nil, fmt.Errorf("error opening /dev/kvm: %v", err) 83 } 84 return f, nil 85 } 86 87 // New returns a new KVM-based implementation of the platform interface. 88 func New(deviceFile *os.File) (*KVM, error) { 89 fd := deviceFile.Fd() 90 91 // Ensure global initialization is done. 92 globalOnce.Do(func() { 93 globalErr = updateGlobalOnce(int(fd)) 94 }) 95 if globalErr != nil { 96 return nil, globalErr 97 } 98 99 // Create a new VM fd. 100 var ( 101 vm uintptr 102 errno unix.Errno 103 ) 104 for { 105 vm, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, _KVM_CREATE_VM, 0) 106 if errno == unix.EINTR { 107 continue 108 } 109 if errno != 0 { 110 return nil, fmt.Errorf("creating VM: %v", errno) 111 } 112 break 113 } 114 // We are done with the device file. 115 deviceFile.Close() 116 117 // Create a VM context. 118 machine, err := newMachine(int(vm)) 119 if err != nil { 120 return nil, err 121 } 122 123 // All set. 124 return &KVM{ 125 machine: machine, 126 }, nil 127 } 128 129 // SupportsAddressSpaceIO implements platform.Platform.SupportsAddressSpaceIO. 130 func (*KVM) SupportsAddressSpaceIO() bool { 131 return false 132 } 133 134 // CooperativelySchedulesAddressSpace implements platform.Platform.CooperativelySchedulesAddressSpace. 135 func (*KVM) CooperativelySchedulesAddressSpace() bool { 136 return false 137 } 138 139 // MapUnit implements platform.Platform.MapUnit. 140 func (*KVM) MapUnit() uint64 { 141 // We greedily creates PTEs in MapFile, so extremely large mappings can 142 // be expensive. Not _that_ expensive since we allow super pages, but 143 // even though can get out of hand if you're creating multi-terabyte 144 // mappings. For this reason, we limit mappings to an arbitrary 16MB. 145 return 16 << 20 146 } 147 148 // MinUserAddress returns the lowest available address. 149 func (*KVM) MinUserAddress() hostarch.Addr { 150 return hostarch.PageSize 151 } 152 153 // MaxUserAddress returns the first address that may not be used. 154 func (*KVM) MaxUserAddress() hostarch.Addr { 155 return hostarch.Addr(ring0.MaximumUserAddress) 156 } 157 158 // NewAddressSpace returns a new pagetable root. 159 func (k *KVM) NewAddressSpace(_ interface{}) (platform.AddressSpace, <-chan struct{}, error) { 160 // Allocate page tables and install system mappings. 161 pageTables := pagetables.NewWithUpper(newAllocator(), k.machine.upperSharedPageTables, ring0.KernelStartAddress) 162 163 // Return the new address space. 164 return &addressSpace{ 165 machine: k.machine, 166 pageTables: pageTables, 167 dirtySet: k.machine.newDirtySet(), 168 }, nil, nil 169 } 170 171 // NewContext returns an interruptible context. 172 func (k *KVM) NewContext() platform.Context { 173 return &context{ 174 machine: k.machine, 175 } 176 } 177 178 type constructor struct{} 179 180 func (*constructor) New(f *os.File) (platform.Platform, error) { 181 return New(f) 182 } 183 184 func (*constructor) OpenDevice() (*os.File, error) { 185 return OpenDevice() 186 } 187 188 // Flags implements platform.Constructor.Flags(). 189 func (*constructor) Requirements() platform.Requirements { 190 return platform.Requirements{} 191 } 192 193 func init() { 194 platform.Register("kvm", &constructor{}) 195 }