gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/ring0/x86.go (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //go:build 386 || amd64 16 // +build 386 amd64 17 18 package ring0 19 20 // Useful bits. 21 const ( 22 _CR0_PE = 1 << 0 23 _CR0_ET = 1 << 4 24 _CR0_NE = 1 << 5 25 _CR0_AM = 1 << 18 26 _CR0_PG = 1 << 31 27 28 _CR4_PSE = 1 << 4 29 _CR4_PAE = 1 << 5 30 _CR4_PGE = 1 << 7 31 _CR4_OSFXSR = 1 << 9 32 _CR4_OSXMMEXCPT = 1 << 10 33 _CR4_FSGSBASE = 1 << 16 34 _CR4_PCIDE = 1 << 17 35 _CR4_OSXSAVE = 1 << 18 36 _CR4_SMEP = 1 << 20 37 _CR4_SMAP = 1 << 21 38 39 _RFLAGS_AC = 1 << 18 40 _RFLAGS_NT = 1 << 14 41 _RFLAGS_IOPL0 = 1 << 12 42 _RFLAGS_IOPL1 = 1 << 13 43 _RFLAGS_IOPL = _RFLAGS_IOPL0 | _RFLAGS_IOPL1 44 _RFLAGS_DF = 1 << 10 45 _RFLAGS_IF = 1 << 9 46 _RFLAGS_STEP = 1 << 8 47 _RFLAGS_RESERVED = 1 << 1 48 49 _EFER_SCE = 0x001 50 _EFER_LME = 0x100 51 _EFER_LMA = 0x400 52 _EFER_NX = 0x800 53 54 _MSR_STAR = 0xc0000081 55 _MSR_LSTAR = 0xc0000082 56 _MSR_CSTAR = 0xc0000083 57 _MSR_SYSCALL_MASK = 0xc0000084 58 _MSR_PLATFORM_INFO = 0xce 59 _MSR_MISC_FEATURES = 0x140 60 61 _PLATFORM_INFO_CPUID_FAULT = 1 << 31 62 63 _MISC_FEATURE_CPUID_TRAP = 0x1 64 ) 65 66 const ( 67 // KernelFlagsSet should always be set in the kernel. 68 KernelFlagsSet = _RFLAGS_RESERVED 69 70 // UserFlagsSet are always set in userspace. 71 // 72 // _RFLAGS_IOPL is a set of two bits and it shows the I/O privilege 73 // level. The Current Privilege Level (CPL) of the task must be less 74 // than or equal to the IOPL in order for the task or program to access 75 // I/O ports. 76 // 77 // Here, _RFLAGS_IOPL0 is used only to determine whether the task is 78 // running in the kernel or userspace mode. In the user mode, the CPL is 79 // always 3 and it doesn't matter what IOPL is set if it is below CPL. 80 // 81 // We need to have one bit which will be always different in user and 82 // kernel modes. And we have to remember that even though we have 83 // KernelFlagsClear, we still can see some of these flags in the kernel 84 // mode. This can happen when the goruntime switches on a goroutine 85 // which has been saved in the host mode. On restore, the popf 86 // instruction is used to restore flags and this means that all flags 87 // what the goroutine has in the host mode will be restored in the 88 // kernel mode. 89 // 90 // _RFLAGS_IOPL0 is never set in host and kernel modes and we always set 91 // it in the user mode. So if this flag is set, the task is running in 92 // the user mode and if it isn't set, the task is running in the kernel 93 // mode. 94 UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF | _RFLAGS_IOPL0 95 96 // KernelFlagsClear should always be clear in the kernel. 97 KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT 98 99 // UserFlagsClear are always cleared in userspace. 100 UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL1 101 ) 102 103 // IsKernelFlags returns true if rflags corresponds to the kernel mode. 104 // 105 //go:nosplit 106 func IsKernelFlags(rflags uint64) bool { 107 return rflags&_RFLAGS_IOPL0 == 0 108 } 109 110 // Vector is an exception vector. 111 type Vector uintptr 112 113 // Exception vectors. 114 const ( 115 DivideByZero Vector = iota 116 Debug 117 NMI 118 Breakpoint 119 Overflow 120 BoundRangeExceeded 121 InvalidOpcode 122 DeviceNotAvailable 123 DoubleFault 124 CoprocessorSegmentOverrun 125 InvalidTSS 126 SegmentNotPresent 127 StackSegmentFault 128 GeneralProtectionFault 129 PageFault 130 _ 131 X87FloatingPointException 132 AlignmentCheck 133 MachineCheck 134 SIMDFloatingPointException 135 VirtualizationException 136 SecurityException = 0x1e 137 SyscallInt80 = 0x80 138 _NR_INTERRUPTS = 0x100 139 ) 140 141 // System call vectors. 142 const ( 143 Syscall Vector = _NR_INTERRUPTS 144 ) 145 146 // Selector is a segment Selector. 147 type Selector uint16 148 149 // SegmentDescriptor is a segment descriptor. 150 type SegmentDescriptor struct { 151 bits [2]uint32 152 } 153 154 // descriptorTable is a collection of descriptors. 155 type descriptorTable [32]SegmentDescriptor 156 157 // SegmentDescriptorFlags are typed flags within a descriptor. 158 type SegmentDescriptorFlags uint32 159 160 // SegmentDescriptorFlag declarations. 161 const ( 162 SegmentDescriptorAccess SegmentDescriptorFlags = 1 << 8 // Access bit (always set). 163 SegmentDescriptorWrite = 1 << 9 // Write permission. 164 SegmentDescriptorExpandDown = 1 << 10 // Grows down, not used. 165 SegmentDescriptorExecute = 1 << 11 // Execute permission. 166 SegmentDescriptorSystem = 1 << 12 // Zero => system, 1 => user code/data. 167 SegmentDescriptorPresent = 1 << 15 // Present. 168 SegmentDescriptorAVL = 1 << 20 // Available. 169 SegmentDescriptorLong = 1 << 21 // Long mode. 170 SegmentDescriptorDB = 1 << 22 // 16 or 32-bit. 171 SegmentDescriptorG = 1 << 23 // Granularity: page or byte. 172 ) 173 174 // Base returns the descriptor's base linear address. 175 func (d *SegmentDescriptor) Base() uint32 { 176 return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16 177 } 178 179 // Limit returns the descriptor size. 180 func (d *SegmentDescriptor) Limit() uint32 { 181 l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000 182 if d.bits[1]&uint32(SegmentDescriptorG) != 0 { 183 l <<= 12 184 l |= 0xFFF 185 } 186 return l 187 } 188 189 // Flags returns descriptor flags. 190 func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags { 191 return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00) 192 } 193 194 // DPL returns the descriptor privilege level. 195 func (d *SegmentDescriptor) DPL() int { 196 return int((d.bits[1] >> 13) & 3) 197 } 198 199 func (d *SegmentDescriptor) setNull() { 200 d.bits[0] = 0 201 d.bits[1] = 0 202 } 203 204 func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) { 205 flags |= SegmentDescriptorPresent 206 if limit>>12 != 0 { 207 limit >>= 12 208 flags |= SegmentDescriptorG 209 } 210 d.bits[0] = base<<16 | limit&0xFFFF 211 d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13 212 } 213 214 func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) { 215 d.set(base, limit, dpl, 216 SegmentDescriptorDB| 217 SegmentDescriptorExecute| 218 SegmentDescriptorSystem) 219 } 220 221 func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) { 222 d.set(base, limit, dpl, 223 SegmentDescriptorG| 224 SegmentDescriptorLong| 225 SegmentDescriptorExecute| 226 SegmentDescriptorSystem) 227 } 228 229 func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) { 230 d.set(base, limit, dpl, 231 SegmentDescriptorWrite| 232 SegmentDescriptorSystem) 233 } 234 235 // setHi is only used for the TSS segment, which is magically 64-bits. 236 func (d *SegmentDescriptor) setHi(base uint32) { 237 d.bits[0] = base 238 d.bits[1] = 0 239 } 240 241 // Gate64 is a 64-bit task, trap, or interrupt gate. 242 type Gate64 struct { 243 bits [4]uint32 244 } 245 246 // idt64 is a 64-bit interrupt descriptor table. 247 type idt64 [_NR_INTERRUPTS]Gate64 248 249 func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) { 250 g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF 251 g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7 252 g.bits[2] = uint32(rip >> 32) 253 } 254 255 func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) { 256 g.setInterrupt(cs, rip, dpl, ist) 257 g.bits[1] |= 1 << 8 258 } 259 260 // TaskState64 is a 64-bit task state structure. 261 type TaskState64 struct { 262 _ uint32 263 rsp0Lo, rsp0Hi uint32 264 rsp1Lo, rsp1Hi uint32 265 rsp2Lo, rsp2Hi uint32 266 _ [2]uint32 267 ist1Lo, ist1Hi uint32 268 ist2Lo, ist2Hi uint32 269 ist3Lo, ist3Hi uint32 270 ist4Lo, ist4Hi uint32 271 ist5Lo, ist5Hi uint32 272 ist6Lo, ist6Hi uint32 273 ist7Lo, ist7Hi uint32 274 _ [2]uint32 275 _ uint16 276 ioPerm uint16 277 }