github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/ring0/x86.go (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // +build 386 amd64
    16  
    17  package ring0
    18  
    19  import (
    20  	"github.com/SagerNet/gvisor/pkg/cpuid"
    21  )
    22  
    23  // Useful bits.
    24  const (
    25  	_CR0_PE = 1 << 0
    26  	_CR0_ET = 1 << 4
    27  	_CR0_AM = 1 << 18
    28  	_CR0_PG = 1 << 31
    29  
    30  	_CR4_PSE        = 1 << 4
    31  	_CR4_PAE        = 1 << 5
    32  	_CR4_PGE        = 1 << 7
    33  	_CR4_OSFXSR     = 1 << 9
    34  	_CR4_OSXMMEXCPT = 1 << 10
    35  	_CR4_FSGSBASE   = 1 << 16
    36  	_CR4_PCIDE      = 1 << 17
    37  	_CR4_OSXSAVE    = 1 << 18
    38  	_CR4_SMEP       = 1 << 20
    39  
    40  	_RFLAGS_AC       = 1 << 18
    41  	_RFLAGS_NT       = 1 << 14
    42  	_RFLAGS_IOPL0    = 1 << 12
    43  	_RFLAGS_IOPL1    = 1 << 13
    44  	_RFLAGS_IOPL     = _RFLAGS_IOPL0 | _RFLAGS_IOPL1
    45  	_RFLAGS_DF       = 1 << 10
    46  	_RFLAGS_IF       = 1 << 9
    47  	_RFLAGS_STEP     = 1 << 8
    48  	_RFLAGS_RESERVED = 1 << 1
    49  
    50  	_EFER_SCE = 0x001
    51  	_EFER_LME = 0x100
    52  	_EFER_LMA = 0x400
    53  	_EFER_NX  = 0x800
    54  
    55  	_MSR_STAR          = 0xc0000081
    56  	_MSR_LSTAR         = 0xc0000082
    57  	_MSR_CSTAR         = 0xc0000083
    58  	_MSR_SYSCALL_MASK  = 0xc0000084
    59  	_MSR_PLATFORM_INFO = 0xce
    60  	_MSR_MISC_FEATURES = 0x140
    61  
    62  	_PLATFORM_INFO_CPUID_FAULT = 1 << 31
    63  
    64  	_MISC_FEATURE_CPUID_TRAP = 0x1
    65  )
    66  
    67  const (
    68  	// KernelFlagsSet should always be set in the kernel.
    69  	KernelFlagsSet = _RFLAGS_RESERVED
    70  
    71  	// UserFlagsSet are always set in userspace.
    72  	//
    73  	// _RFLAGS_IOPL is a set of two bits and it shows the I/O privilege
    74  	// level. The Current Privilege Level (CPL) of the task must be less
    75  	// than or equal to the IOPL in order for the task or program to access
    76  	// I/O ports.
    77  	//
    78  	// Here, _RFLAGS_IOPL0 is used only to determine whether the task is
    79  	// running in the kernel or userspace mode. In the user mode, the CPL is
    80  	// always 3 and it doesn't matter what IOPL is set if it is bellow CPL.
    81  	//
    82  	// We need to have one bit which will be always different in user and
    83  	// kernel modes. And we have to remember that even though we have
    84  	// KernelFlagsClear, we still can see some of these flags in the kernel
    85  	// mode. This can happen when the goruntime switches on a goroutine
    86  	// which has been saved in the host mode. On restore, the popf
    87  	// instruction is used to restore flags and this means that all flags
    88  	// what the goroutine has in the host mode will be restored in the
    89  	// kernel mode.
    90  	//
    91  	// _RFLAGS_IOPL0 is never set in host and kernel modes and we always set
    92  	// it in the user mode. So if this flag is set, the task is running in
    93  	// the user mode and if it isn't set, the task is running in the kernel
    94  	// mode.
    95  	UserFlagsSet = _RFLAGS_RESERVED | _RFLAGS_IF | _RFLAGS_IOPL0
    96  
    97  	// KernelFlagsClear should always be clear in the kernel.
    98  	KernelFlagsClear = _RFLAGS_STEP | _RFLAGS_IF | _RFLAGS_IOPL | _RFLAGS_AC | _RFLAGS_NT
    99  
   100  	// UserFlagsClear are always cleared in userspace.
   101  	UserFlagsClear = _RFLAGS_NT | _RFLAGS_IOPL1
   102  )
   103  
   104  // IsKernelFlags returns true if rflags coresponds to the kernel mode.
   105  //
   106  // go:nosplit
   107  func IsKernelFlags(rflags uint64) bool {
   108  	return rflags&_RFLAGS_IOPL0 == 0
   109  }
   110  
   111  // Vector is an exception vector.
   112  type Vector uintptr
   113  
   114  // Exception vectors.
   115  const (
   116  	DivideByZero Vector = iota
   117  	Debug
   118  	NMI
   119  	Breakpoint
   120  	Overflow
   121  	BoundRangeExceeded
   122  	InvalidOpcode
   123  	DeviceNotAvailable
   124  	DoubleFault
   125  	CoprocessorSegmentOverrun
   126  	InvalidTSS
   127  	SegmentNotPresent
   128  	StackSegmentFault
   129  	GeneralProtectionFault
   130  	PageFault
   131  	_
   132  	X87FloatingPointException
   133  	AlignmentCheck
   134  	MachineCheck
   135  	SIMDFloatingPointException
   136  	VirtualizationException
   137  	SecurityException = 0x1e
   138  	SyscallInt80      = 0x80
   139  	_NR_INTERRUPTS    = 0x100
   140  )
   141  
   142  // System call vectors.
   143  const (
   144  	Syscall Vector = _NR_INTERRUPTS
   145  )
   146  
   147  // VirtualAddressBits returns the number bits available for virtual addresses.
   148  //
   149  // Note that sign-extension semantics apply to the highest order bit.
   150  //
   151  // FIXME(b/69382326): This should use the cpuid passed to Init.
   152  func VirtualAddressBits() uint32 {
   153  	ax, _, _, _ := cpuid.HostID(0x80000008, 0)
   154  	return (ax >> 8) & 0xff
   155  }
   156  
   157  // PhysicalAddressBits returns the number of bits available for physical addresses.
   158  //
   159  // FIXME(b/69382326): This should use the cpuid passed to Init.
   160  func PhysicalAddressBits() uint32 {
   161  	ax, _, _, _ := cpuid.HostID(0x80000008, 0)
   162  	return ax & 0xff
   163  }
   164  
   165  // Selector is a segment Selector.
   166  type Selector uint16
   167  
   168  // SegmentDescriptor is a segment descriptor.
   169  type SegmentDescriptor struct {
   170  	bits [2]uint32
   171  }
   172  
   173  // descriptorTable is a collection of descriptors.
   174  type descriptorTable [32]SegmentDescriptor
   175  
   176  // SegmentDescriptorFlags are typed flags within a descriptor.
   177  type SegmentDescriptorFlags uint32
   178  
   179  // SegmentDescriptorFlag declarations.
   180  const (
   181  	SegmentDescriptorAccess     SegmentDescriptorFlags = 1 << 8  // Access bit (always set).
   182  	SegmentDescriptorWrite                             = 1 << 9  // Write permission.
   183  	SegmentDescriptorExpandDown                        = 1 << 10 // Grows down, not used.
   184  	SegmentDescriptorExecute                           = 1 << 11 // Execute permission.
   185  	SegmentDescriptorSystem                            = 1 << 12 // Zero => system, 1 => user code/data.
   186  	SegmentDescriptorPresent                           = 1 << 15 // Present.
   187  	SegmentDescriptorAVL                               = 1 << 20 // Available.
   188  	SegmentDescriptorLong                              = 1 << 21 // Long mode.
   189  	SegmentDescriptorDB                                = 1 << 22 // 16 or 32-bit.
   190  	SegmentDescriptorG                                 = 1 << 23 // Granularity: page or byte.
   191  )
   192  
   193  // Base returns the descriptor's base linear address.
   194  func (d *SegmentDescriptor) Base() uint32 {
   195  	return d.bits[1]&0xFF000000 | (d.bits[1]&0x000000FF)<<16 | d.bits[0]>>16
   196  }
   197  
   198  // Limit returns the descriptor size.
   199  func (d *SegmentDescriptor) Limit() uint32 {
   200  	l := d.bits[0]&0xFFFF | d.bits[1]&0xF0000
   201  	if d.bits[1]&uint32(SegmentDescriptorG) != 0 {
   202  		l <<= 12
   203  		l |= 0xFFF
   204  	}
   205  	return l
   206  }
   207  
   208  // Flags returns descriptor flags.
   209  func (d *SegmentDescriptor) Flags() SegmentDescriptorFlags {
   210  	return SegmentDescriptorFlags(d.bits[1] & 0x00F09F00)
   211  }
   212  
   213  // DPL returns the descriptor privilege level.
   214  func (d *SegmentDescriptor) DPL() int {
   215  	return int((d.bits[1] >> 13) & 3)
   216  }
   217  
   218  func (d *SegmentDescriptor) setNull() {
   219  	d.bits[0] = 0
   220  	d.bits[1] = 0
   221  }
   222  
   223  func (d *SegmentDescriptor) set(base, limit uint32, dpl int, flags SegmentDescriptorFlags) {
   224  	flags |= SegmentDescriptorPresent
   225  	if limit>>12 != 0 {
   226  		limit >>= 12
   227  		flags |= SegmentDescriptorG
   228  	}
   229  	d.bits[0] = base<<16 | limit&0xFFFF
   230  	d.bits[1] = base&0xFF000000 | (base>>16)&0xFF | limit&0x000F0000 | uint32(flags) | uint32(dpl)<<13
   231  }
   232  
   233  func (d *SegmentDescriptor) setCode32(base, limit uint32, dpl int) {
   234  	d.set(base, limit, dpl,
   235  		SegmentDescriptorDB|
   236  			SegmentDescriptorExecute|
   237  			SegmentDescriptorSystem)
   238  }
   239  
   240  func (d *SegmentDescriptor) setCode64(base, limit uint32, dpl int) {
   241  	d.set(base, limit, dpl,
   242  		SegmentDescriptorG|
   243  			SegmentDescriptorLong|
   244  			SegmentDescriptorExecute|
   245  			SegmentDescriptorSystem)
   246  }
   247  
   248  func (d *SegmentDescriptor) setData(base, limit uint32, dpl int) {
   249  	d.set(base, limit, dpl,
   250  		SegmentDescriptorWrite|
   251  			SegmentDescriptorSystem)
   252  }
   253  
   254  // setHi is only used for the TSS segment, which is magically 64-bits.
   255  func (d *SegmentDescriptor) setHi(base uint32) {
   256  	d.bits[0] = base
   257  	d.bits[1] = 0
   258  }
   259  
   260  // Gate64 is a 64-bit task, trap, or interrupt gate.
   261  type Gate64 struct {
   262  	bits [4]uint32
   263  }
   264  
   265  // idt64 is a 64-bit interrupt descriptor table.
   266  type idt64 [_NR_INTERRUPTS]Gate64
   267  
   268  func (g *Gate64) setInterrupt(cs Selector, rip uint64, dpl int, ist int) {
   269  	g.bits[0] = uint32(cs)<<16 | uint32(rip)&0xFFFF
   270  	g.bits[1] = uint32(rip)&0xFFFF0000 | SegmentDescriptorPresent | uint32(dpl)<<13 | 14<<8 | uint32(ist)&0x7
   271  	g.bits[2] = uint32(rip >> 32)
   272  }
   273  
   274  func (g *Gate64) setTrap(cs Selector, rip uint64, dpl int, ist int) {
   275  	g.setInterrupt(cs, rip, dpl, ist)
   276  	g.bits[1] |= 1 << 8
   277  }
   278  
   279  // TaskState64 is a 64-bit task state structure.
   280  type TaskState64 struct {
   281  	_              uint32
   282  	rsp0Lo, rsp0Hi uint32
   283  	rsp1Lo, rsp1Hi uint32
   284  	rsp2Lo, rsp2Hi uint32
   285  	_              [2]uint32
   286  	ist1Lo, ist1Hi uint32
   287  	ist2Lo, ist2Hi uint32
   288  	ist3Lo, ist3Hi uint32
   289  	ist4Lo, ist4Hi uint32
   290  	ist5Lo, ist5Hi uint32
   291  	ist6Lo, ist6Hi uint32
   292  	ist7Lo, ist7Hi uint32
   293  	_              [2]uint32
   294  	_              uint16
   295  	ioPerm         uint16
   296  }