gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/pkg/ring0/x86.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build 386 || amd64
    16  // +build 386 amd64
    17  
    18  package ring0
    19  
    20  // Useful bits.
    21  const (
    22  	_CR0_PE = 1 << 0
    23  	_CR0_ET = 1 << 4
    24  	_CR0_NE = 1 << 5
    25  	_CR0_AM = 1 << 18
    26  	_CR0_PG = 1 << 31
    27  
    28  	_CR4_PSE        = 1 << 4
    29  	_CR4_PAE        = 1 << 5
    30  	_CR4_PGE        = 1 << 7
    31  	_CR4_OSFXSR     = 1 << 9
    32  	_CR4_OSXMMEXCPT = 1 << 10
    33  	_CR4_FSGSBASE   = 1 << 16
    34  	_CR4_PCIDE      = 1 << 17
    35  	_CR4_OSXSAVE    = 1 << 18
    36  	_CR4_SMEP       = 1 << 20
    37  	_CR4_SMAP       = 1 << 21
    38  
    39  	_RFLAGS_AC       = 1 << 18
    40  	_RFLAGS_NT       = 1 << 14
    41  	_RFLAGS_IOPL0    = 1 << 12
    42  	_RFLAGS_IOPL1    = 1 << 13
    43  	_RFLAGS_IOPL     = _RFLAGS_IOPL0 | _RFLAGS_IOPL1
    44  	_RFLAGS_DF       = 1 << 10
    45  	_RFLAGS_IF       = 1 << 9
    46  	_RFLAGS_STEP     = 1 << 8
    47  	_RFLAGS_RESERVED = 1 << 1
    48  
    49  	_EFER_SCE = 0x001
    50  	_EFER_LME = 0x100
    51  	_EFER_LMA = 0x400
    52  	_EFER_NX  = 0x800
    53  
    54  	_MSR_STAR          = 0xc0000081
    55  	_MSR_LSTAR         = 0xc0000082
    56  	_MSR_CSTAR         = 0xc0000083
    57  	_MSR_SYSCALL_MASK  = 0xc0000084
    58  	_MSR_PLATFORM_INFO = 0xce
    59  	_MSR_MISC_FEATURES = 0x140
    60  
    61  	_PLATFORM_INFO_CPUID_FAULT = 1 << 31
    62  
    63  	_MISC_FEATURE_CPUID_TRAP = 0x1
    64  )
    65  
    66  const (
    67  	// KernelFlagsSet should always be set in the kernel.
    68  	KernelFlagsSet = _RFLAGS_RESERVED
    69  
    70  	// UserFlagsSet are always set in userspace.
    71  	//
    72  	// _RFLAGS_IOPL is a set of two bits and it shows the I/O privilege
    73  	// level. The Current Privilege Level (CPL) of the task must be less
    74  	// than or equal to the IOPL in order for the task or program to access
    75  	// I/O ports.
    76  	//
    77  	// Here, _RFLAGS_IOPL0 is used only to determine whether the task is
    78  	// running in the kernel or userspace mode. In the user mode, the CPL is
    79  	// always 3 and it doesn't matter what IOPL is set if it is below CPL.
    80  	//
    81  	// We need to have one bit which will be always different in user and
    82  	// kernel modes. And we have to remember that even though we have
    83  	// KernelFlagsClear, we still can see some of these flags in the kernel
    84  	// mode. This can happen when the goruntime switches on a goroutine
    85  	// which has been saved in the host mode. On restore, the popf
    86  	// instruction is used to restore flags and this means that all flags
    87  	// what the goroutine has in the host mode will be restored in the
    88  	// kernel mode.
    89  	//
    90  	// _RFLAGS_IOPL0 is never set in host and kernel modes and we always set
    91  	// it in the user mode. So if this flag is set, the task is running in
    92  	// the user mode and if it isn't set, the task is running in the kernel
    93  	// mode.
    94  	UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF | _RFLAGS_IOPL0
    95  
    96  	// KernelFlagsClear should always be clear in the kernel.
    97  	KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT
    98  
    99  	// UserFlagsClear are always cleared in userspace.
   100  	UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL1
   101  )
   102  
   103  // IsKernelFlags returns true if rflags corresponds to the kernel mode.
   104  //
   105  //go:nosplit
   106  func IsKernelFlags(rflags uint64) bool {
   107  	return rflags&_RFLAGS_IOPL0 == 0
   108  }
   109  
   110  // Vector is an exception vector.
   111  type Vector uintptr
   112  
   113  // Exception vectors.
   114  const (
   115  	DivideByZero Vector = iota
   116  	Debug
   117  	NMI
   118  	Breakpoint
   119  	Overflow
   120  	BoundRangeExceeded
   121  	InvalidOpcode
   122  	DeviceNotAvailable
   123  	DoubleFault
   124  	CoprocessorSegmentOverrun
   125  	InvalidTSS
   126  	SegmentNotPresent
   127  	StackSegmentFault
   128  	GeneralProtectionFault
   129  	PageFault
   130  	_
   131  	X87FloatingPointException
   132  	AlignmentCheck
   133  	MachineCheck
   134  	SIMDFloatingPointException
   135  	VirtualizationException
   136  	SecurityException = 0x1e
   137  	SyscallInt80      = 0x80
   138  	_NR_INTERRUPTS    = 0x100
   139  )
   140  
   141  // System call vectors.
   142  const (
   143  	Syscall Vector = _NR_INTERRUPTS
   144  )
   145  
   146  // Selector is a segment Selector.
   147  type Selector uint16
   148  
   149  // SegmentDescriptor is a segment descriptor.
   150  type SegmentDescriptor struct {
   151  	bits [2]uint32
   152  }
   153  
   154  // descriptorTable is a collection of descriptors.
   155  type descriptorTable [32]SegmentDescriptor
   156  
   157  // SegmentDescriptorFlags are typed flags within a descriptor.
   158  type SegmentDescriptorFlags uint32
   159  
   160  // SegmentDescriptorFlag declarations.
   161  const (
   162  	SegmentDescriptorAccess     SegmentDescriptorFlags = 1 << 8  // Access bit (always set).
   163  	SegmentDescriptorWrite                             = 1 << 9  // Write permission.
   164  	SegmentDescriptorExpandDown                        = 1 << 10 // Grows down, not used.
   165  	SegmentDescriptorExecute                           = 1 << 11 // Execute permission.
   166  	SegmentDescriptorSystem                            = 1 << 12 // Zero => system, 1 => user code/data.
   167  	SegmentDescriptorPresent                           = 1 << 15 // Present.
   168  	SegmentDescriptorAVL                               = 1 << 20 // Available.
   169  	SegmentDescriptorLong                              = 1 << 21 // Long mode.
   170  	SegmentDescriptorDB                                = 1 << 22 // 16 or 32-bit.
   171  	SegmentDescriptorG                                 = 1 << 23 // Granularity: page or byte.
   172  )
   173  
   174  // Base returns the descriptor's base linear address.
   175  func (d *SegmentDescriptor) Base() uint32 {
   176  	return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16
   177  }
   178  
   179  // Limit returns the descriptor size.
   180  func (d *SegmentDescriptor) Limit() uint32 {
   181  	l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000
   182  	if d.bits[1]&uint32(SegmentDescriptorG) != 0 {
   183  		l <<= 12
   184  		l |= 0xFFF
   185  	}
   186  	return l
   187  }
   188  
   189  // Flags returns descriptor flags.
   190  func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags {
   191  	return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00)
   192  }
   193  
   194  // DPL returns the descriptor privilege level.
   195  func (d *SegmentDescriptor) DPL() int {
   196  	return int((d.bits[1] >> 13) & 3)
   197  }
   198  
   199  func (d *SegmentDescriptor) setNull() {
   200  	d.bits[0] = 0
   201  	d.bits[1] = 0
   202  }
   203  
   204  func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) {
   205  	flags |= SegmentDescriptorPresent
   206  	if limit>>12 != 0 {
   207  		limit >>= 12
   208  		flags |= SegmentDescriptorG
   209  	}
   210  	d.bits[0] = base<<16 | limit&0xFFFF
   211  	d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13
   212  }
   213  
   214  func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) {
   215  	d.set(base, limit, dpl,
   216  		SegmentDescriptorDB|
   217  			SegmentDescriptorExecute|
   218  			SegmentDescriptorSystem)
   219  }
   220  
   221  func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) {
   222  	d.set(base, limit, dpl,
   223  		SegmentDescriptorG|
   224  			SegmentDescriptorLong|
   225  			SegmentDescriptorExecute|
   226  			SegmentDescriptorSystem)
   227  }
   228  
   229  func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) {
   230  	d.set(base, limit, dpl,
   231  		SegmentDescriptorWrite|
   232  			SegmentDescriptorSystem)
   233  }
   234  
   235  // setHi is only used for the TSS segment, which is magically 64-bits.
   236  func (d *SegmentDescriptor) setHi(base uint32) {
   237  	d.bits[0] = base
   238  	d.bits[1] = 0
   239  }
   240  
   241  // Gate64 is a 64-bit task, trap, or interrupt gate.
   242  type Gate64 struct {
   243  	bits [4]uint32
   244  }
   245  
   246  // idt64 is a 64-bit interrupt descriptor table.
   247  type idt64 [_NR_INTERRUPTS]Gate64
   248  
   249  func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) {
   250  	g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF
   251  	g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7
   252  	g.bits[2] = uint32(rip >> 32)
   253  }
   254  
   255  func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) {
   256  	g.setInterrupt(cs, rip, dpl, ist)
   257  	g.bits[1] |= 1 << 8
   258  }
   259  
   260  // TaskState64 is a 64-bit task state structure.
   261  type TaskState64 struct {
   262  	_              uint32
   263  	rsp0Lo, rsp0Hi uint32
   264  	rsp1Lo, rsp1Hi uint32
   265  	rsp2Lo, rsp2Hi uint32
   266  	_              [2]uint32
   267  	ist1Lo, ist1Hi uint32
   268  	ist2Lo, ist2Hi uint32
   269  	ist3Lo, ist3Hi uint32
   270  	ist4Lo, ist4Hi uint32
   271  	ist5Lo, ist5Hi uint32
   272  	ist6Lo, ist6Hi uint32
   273  	ist7Lo, ist7Hi uint32
   274  	_              [2]uint32
   275  	_              uint16
   276  	ioPerm         uint16
   277  }