github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/ring0/x86.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // +build 386 amd64 16 17 package ring0 18 19 import ( 20 "github.com/SagerNet/gvisor/pkg/cpuid" 21 ) 22 23 // Useful bits. 24 const ( 25 _CR0_PE = 1 << 0 26 _CR0_ET = 1 << 4 27 _CR0_AM = 1 << 18 28 _CR0_PG = 1 << 31 29 30 _CR4_PSE = 1 << 4 31 _CR4_PAE = 1 << 5 32 _CR4_PGE = 1 << 7 33 _CR4_OSFXSR = 1 << 9 34 _CR4_OSXMMEXCPT = 1 << 10 35 _CR4_FSGSBASE = 1 << 16 36 _CR4_PCIDE = 1 << 17 37 _CR4_OSXSAVE = 1 << 18 38 _CR4_SMEP = 1 << 20 39 40 _RFLAGS_AC = 1 << 18 41 _RFLAGS_NT = 1 << 14 42 _RFLAGS_IOPL0 = 1 << 12 43 _RFLAGS_IOPL1 = 1 << 13 44 _RFLAGS_IOPL = _RFLAGS_IOPL0 | _RFLAGS_IOPL1 45 _RFLAGS_DF = 1 << 10 46 _RFLAGS_IF = 1 << 9 47 _RFLAGS_STEP = 1 << 8 48 _RFLAGS_RESERVED = 1 << 1 49 50 _EFER_SCE = 0x001 51 _EFER_LME = 0x100 52 _EFER_LMA = 0x400 53 _EFER_NX = 0x800 54 55 _MSR_STAR = 0xc0000081 56 _MSR_LSTAR = 0xc0000082 57 _MSR_CSTAR = 0xc0000083 58 _MSR_SYSCALL_MASK = 0xc0000084 59 _MSR_PLATFORM_INFO = 0xce 60 _MSR_MISC_FEATURES = 0x140 61 62 _PLATFORM_INFO_CPUID_FAULT = 1 << 31 63 64 _MISC_FEATURE_CPUID_TRAP = 0x1 65 ) 66 67 const ( 68 // KernelFlagsSet should always be set in the kernel. 69 KernelFlagsSet = _RFLAGS_RESERVED 70 71 // UserFlagsSet are always set in userspace. 72 // 73 // _RFLAGS_IOPL is a set of two bits and it shows the I/O privilege 74 // level. The Current Privilege Level (CPL) of the task must be less 75 // than or equal to the IOPL in order for the task or program to access 76 // I/O ports. 77 // 78 // Here, _RFLAGS_IOPL0 is used only to determine whether the task is 79 // running in the kernel or userspace mode. In the user mode, the CPL is 80 // always 3 and it doesn't matter what IOPL is set if it is bellow CPL. 81 // 82 // We need to have one bit which will be always different in user and 83 // kernel modes. And we have to remember that even though we have 84 // KernelFlagsClear, we still can see some of these flags in the kernel 85 // mode. This can happen when the goruntime switches on a goroutine 86 // which has been saved in the host mode. On restore, the popf 87 // instruction is used to restore flags and this means that all flags 88 // what the goroutine has in the host mode will be restored in the 89 // kernel mode. 90 // 91 // _RFLAGS_IOPL0 is never set in host and kernel modes and we always set 92 // it in the user mode. So if this flag is set, the task is running in 93 // the user mode and if it isn't set, the task is running in the kernel 94 // mode. 95 UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF | _RFLAGS_IOPL0 96 97 // KernelFlagsClear should always be clear in the kernel. 98 KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT 99 100 // UserFlagsClear are always cleared in userspace. 101 UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL1 102 ) 103 104 // IsKernelFlags returns true if rflags coresponds to the kernel mode. 105 // 106 // go:nosplit 107 func IsKernelFlags(rflags uint64) bool { 108 return rflags&_RFLAGS_IOPL0 == 0 109 } 110 111 // Vector is an exception vector. 112 type Vector uintptr 113 114 // Exception vectors. 115 const ( 116 DivideByZero Vector = iota 117 Debug 118 NMI 119 Breakpoint 120 Overflow 121 BoundRangeExceeded 122 InvalidOpcode 123 DeviceNotAvailable 124 DoubleFault 125 CoprocessorSegmentOverrun 126 InvalidTSS 127 SegmentNotPresent 128 StackSegmentFault 129 GeneralProtectionFault 130 PageFault 131 _ 132 X87FloatingPointException 133 AlignmentCheck 134 MachineCheck 135 SIMDFloatingPointException 136 VirtualizationException 137 SecurityException = 0x1e 138 SyscallInt80 = 0x80 139 _NR_INTERRUPTS = 0x100 140 ) 141 142 // System call vectors. 143 const ( 144 Syscall Vector = _NR_INTERRUPTS 145 ) 146 147 // VirtualAddressBits returns the number bits available for virtual addresses. 148 // 149 // Note that sign-extension semantics apply to the highest order bit. 150 // 151 // FIXME(b/69382326): This should use the cpuid passed to Init. 152 func VirtualAddressBits() uint32 { 153 ax, _, _, _ := cpuid.HostID(0x80000008, 0) 154 return (ax >> 8) & 0xff 155 } 156 157 // PhysicalAddressBits returns the number of bits available for physical addresses. 158 // 159 // FIXME(b/69382326): This should use the cpuid passed to Init. 160 func PhysicalAddressBits() uint32 { 161 ax, _, _, _ := cpuid.HostID(0x80000008, 0) 162 return ax & 0xff 163 } 164 165 // Selector is a segment Selector. 166 type Selector uint16 167 168 // SegmentDescriptor is a segment descriptor. 169 type SegmentDescriptor struct { 170 bits [2]uint32 171 } 172 173 // descriptorTable is a collection of descriptors. 174 type descriptorTable [32]SegmentDescriptor 175 176 // SegmentDescriptorFlags are typed flags within a descriptor. 177 type SegmentDescriptorFlags uint32 178 179 // SegmentDescriptorFlag declarations. 180 const ( 181 SegmentDescriptorAccess SegmentDescriptorFlags = 1 << 8 // Access bit (always set). 182 SegmentDescriptorWrite = 1 << 9 // Write permission. 183 SegmentDescriptorExpandDown = 1 << 10 // Grows down, not used. 184 SegmentDescriptorExecute = 1 << 11 // Execute permission. 185 SegmentDescriptorSystem = 1 << 12 // Zero => system, 1 => user code/data. 186 SegmentDescriptorPresent = 1 << 15 // Present. 187 SegmentDescriptorAVL = 1 << 20 // Available. 188 SegmentDescriptorLong = 1 << 21 // Long mode. 189 SegmentDescriptorDB = 1 << 22 // 16 or 32-bit. 190 SegmentDescriptorG = 1 << 23 // Granularity: page or byte. 191 ) 192 193 // Base returns the descriptor's base linear address. 194 func (d *SegmentDescriptor) Base() uint32 { 195 return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16 196 } 197 198 // Limit returns the descriptor size. 199 func (d *SegmentDescriptor) Limit() uint32 { 200 l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000 201 if d.bits[1]&uint32(SegmentDescriptorG) != 0 { 202 l <<= 12 203 l |= 0xFFF 204 } 205 return l 206 } 207 208 // Flags returns descriptor flags. 209 func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags { 210 return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00) 211 } 212 213 // DPL returns the descriptor privilege level. 214 func (d *SegmentDescriptor) DPL() int { 215 return int((d.bits[1] >> 13) & 3) 216 } 217 218 func (d *SegmentDescriptor) setNull() { 219 d.bits[0] = 0 220 d.bits[1] = 0 221 } 222 223 func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) { 224 flags |= SegmentDescriptorPresent 225 if limit>>12 != 0 { 226 limit >>= 12 227 flags |= SegmentDescriptorG 228 } 229 d.bits[0] = base<<16 | limit&0xFFFF 230 d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13 231 } 232 233 func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) { 234 d.set(base, limit, dpl, 235 SegmentDescriptorDB| 236 SegmentDescriptorExecute| 237 SegmentDescriptorSystem) 238 } 239 240 func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) { 241 d.set(base, limit, dpl, 242 SegmentDescriptorG| 243 SegmentDescriptorLong| 244 SegmentDescriptorExecute| 245 SegmentDescriptorSystem) 246 } 247 248 func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) { 249 d.set(base, limit, dpl, 250 SegmentDescriptorWrite| 251 SegmentDescriptorSystem) 252 } 253 254 // setHi is only used for the TSS segment, which is magically 64-bits. 255 func (d *SegmentDescriptor) setHi(base uint32) { 256 d.bits[0] = base 257 d.bits[1] = 0 258 } 259 260 // Gate64 is a 64-bit task, trap, or interrupt gate. 261 type Gate64 struct { 262 bits [4]uint32 263 } 264 265 // idt64 is a 64-bit interrupt descriptor table. 266 type idt64 [_NR_INTERRUPTS]Gate64 267 268 func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) { 269 g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF 270 g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7 271 g.bits[2] = uint32(rip >> 32) 272 } 273 274 func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) { 275 g.setInterrupt(cs, rip, dpl, ist) 276 g.bits[1] |= 1 << 8 277 } 278 279 // TaskState64 is a 64-bit task state structure. 280 type TaskState64 struct { 281 _ uint32 282 rsp0Lo, rsp0Hi uint32 283 rsp1Lo, rsp1Hi uint32 284 rsp2Lo, rsp2Hi uint32 285 _ [2]uint32 286 ist1Lo, ist1Hi uint32 287 ist2Lo, ist2Hi uint32 288 ist3Lo, ist3Hi uint32 289 ist4Lo, ist4Hi uint32 290 ist5Lo, ist5Hi uint32 291 ist6Lo, ist6Hi uint32 292 ist7Lo, ist7Hi uint32 293 _ [2]uint32 294 _ uint16 295 ioPerm uint16 296 }