github.com/jlmucb/cloudproxy@v0.0.0-20170830161738-b5aa0b619bc4/cpvmm/vmm/bootstrap/bootstrap_entry.c (about)

     1  /*
     2   * File: bootstrap_entry.c
     3   * Description: Get tbooted and boot 64 bit evmm
     4   * Author: John Manferdelli
     5   *
     6   * Copyright (c) 2013 Intel Corporation
     7   *
     8   * Licensed under the Apache License, Version 2.0 (the "License");
     9   * you may not use this file except in compliance with the License.
    10   * You may obtain a copy of the License at
    11   *           http://www.apache.org/licenses/LICENSE-2.0
    12   * Unless required by applicable law or agreed to in writing, software
    13   * distributed under the License is distributed on an "AS IS" BASIS,
    14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15   * See the License for the specific language governing permissions and
    16   * limitations under the License.
    17   */
    18  
    19  
    20  #include "bootstrap_types.h"
    21  #include "bootstrap_string.h"
    22  #include "bootstrap_print.h"
    23  #include "bootstrap_ia.h"
    24  
    25  #include "multiboot.h"
    26  #include "e820.h"
    27  #include "elf64.h"
    28  #include "linux_defns.h"
    29  #include "elf_defns.h"
    30  #include "tboot.h"
    31  
    32  #include "vmm_defs.h"
    33  #include "em64t_defs.h"
    34  #include "ia32_defs.h"
    35  #include "ia32_low_level.h"
    36  #include "x32_init64.h"
    37  #include "vmm_startup.h"
    38  
    39  
    40  // this is all 32 bit code
    41  
    42  #define JLMDEBUG
    43  
    44  #define MULTIAPS_ENABLED
    45  
    46  // JLM: Remove this soon 
    47  #ifdef TBOOT_SHARED_PAGE
    48  tboot_shared_t *shared_page = (tboot_shared_t *)0x829000;
    49  #else
    50  tboot_shared_t *shared_page = (tboot_shared_t *)NULL;
    51  #endif
    52  
    53  
    54  #define PSE_BIT     0x10
    55  #define PAE_BIT     0x20
    56  #define PAGE_SIZE   (1024 * 4) 
    57  #define PAGE_MASK   (~(PAGE_SIZE-1))
    58  #define PAGE_UP(a) ((a+(PAGE_SIZE-1))&PAGE_MASK)
    59  #define MAXPROCESSORS 64
    60  
    61  
    62  #define LOWBOOTPARAMS
    63  
    64  
    65  // -------------------------------------------------------------------------
    66  
    67  
    68  //   These are the variables for machine setup
    69  //       Consult evmm-init-notes.txt
    70  
    71  // start and end of bootstrap, no header, start is load address
    72  extern uint32_t _start_bootstrap, _end_bootstrap;
    73  
    74  
    75  #define EVMM_DEFAULT_START_ADDR  0x70000000 
    76  #define LINUX_DEFAULT_LOAD_ADDRESS 0x100000
    77  #define EVMM_HEAP_SIZE 0x100000
    78  #define EVMM_HEAP_BASE (EVMM_DEFAULT_START_ADDR- EVMM_HEAP_SIZE)
    79  #define UVMM_DEFAULT_HEAP 0x5000000
    80  
    81  
    82  //      Memory layout on start32_evmm entry
    83  uint32_t tboot_start= 0;        // tboot image start address
    84  uint32_t tboot_end= 0;          // tboot image end address
    85  uint32_t bootstrap_start= 0;    // bootstrap image start address
    86  uint32_t bootstrap_end= 0;      // bootstrap image end address
    87  uint32_t evmm_start= 0;         // location of evmm start
    88  uint32_t evmm_end= 0;           // location of evmm image start
    89  uint32_t linux_start= 0;        // location of linux imag start
    90  uint32_t linux_end= 0;          // location of evmm start
    91  uint32_t initram_start= 0;      // location of initram image start
    92  uint32_t initram_end= 0;        // location of initram image end
    93  
    94  
    95  // Post relocation addresses
    96  uint32_t evmm_start_address= 0;         // address of evmm after relocation
    97  uint32_t evmm_image_size= 0;            // image size
    98  uint32_t evmm_orig_mem_size= 0;         // original memsize
    99  uint32_t evmm_mem_size= 0;              // memory size (rounded up)
   100  uint32_t evmm_total_size= 0;            // total size (includes heap)
   101  uint32_t vmm_main_entry_point= 0;       // address of vmm_main after relocation
   102  uint32_t evmm_heap_base= 0;             // start of initial evmm heap
   103  uint32_t evmm_heap_current= 0; 
   104  uint32_t evmm_heap_top= 0;
   105  uint32_t evmm_heap_size= 0;             // size of initial evmm heap
   106  uint32_t evmm_bsp_stack_base= 0;        // low address where stack is allocated
   107  uint32_t evmm_bsp_stack= 0;             // initial bsp evmm stack
   108  uint32_t evmm_stack_pointers_array[MAXPROCESSORS];
   109  char*    evmm_command_line= NULL;       // evmm command line
   110  
   111  multiboot_info_t  linux_mbi;            // mbi for linux
   112  
   113  extern unsigned int max_e820_entries;   // copied e820 globals
   114  extern unsigned int g_nr_map;           // copied e820 globals
   115  extern memory_map_t *g_copy_e820_map;   // copied e820 globals
   116  memory_map_t bootstrap_e820[E820MAX];   // our e820 map
   117  
   118  // linux guest
   119  uint32_t linux_real_mode_start= 0;      // address of real mode
   120  uint32_t linux_real_mode_size= 0;       // size of real mode size
   121  uint32_t linux_protected_mode_start= 0; // address of protected mode
   122  uint32_t linux_protected_mode_size= 0;  // size of protected mode
   123  uint32_t linux_start_address= 0;        // start address of image
   124  uint32_t initram_start_address= 0;      // address of the initram 
   125  uint32_t linux_entry_address= 0;        // address of the eip guest entry
   126  uint32_t linux_edi_register= 0;         // edi register on guest entry
   127  uint32_t linux_esp_register= 0;         // esp on guest entry
   128  uint32_t linux_stack_base= 0;           // base of the stack on entry
   129  uint32_t linux_stack_size= 0;           // stack size on guest entry
   130  char*    linux_command_line= NULL;      // old command line
   131  char*    new_cmdline= NULL;             // new command line
   132  
   133  // boot parameters for linux guest
   134  uint32_t linux_original_boot_parameters= 0;
   135  uint32_t linux_boot_parameters= 0;
   136  
   137  
   138  // -------------------------------------------------------------------------
   139  
   140  
   141  //      initial evmm heap implementation
   142  
   143  
   144  void setup_evmm_heap(uint32_t heap_base_address, uint32_t heap_bytes)
   145  {
   146      evmm_heap_current = evmm_heap_base = heap_base_address;
   147      evmm_heap_top = evmm_heap_base + heap_bytes;
   148      evmm_heap_size= heap_bytes;
   149  }
   150  
   151  
   152  void *evmm_page_alloc(uint32_t pages)
   153  {
   154      uint32_t address;
   155      uint32_t size = pages * PAGE_SIZE;
   156  
   157      address = ALIGN_FORWARD(evmm_heap_current, PAGE_SIZE);
   158      evmm_heap_current = address + size;
   159      vmm_memset((void*)address, 0, size);
   160      return (void*)address;
   161  }
   162  
   163  
   164  // -------------------------------------------------------------------------
   165  
   166  
   167  //  Machine state and mode transition data structures
   168  #define TOTAL_MEM 0x100000000ULL
   169  
   170  static uint32_t                         evmm64_cs_selector= 0;
   171  static uint32_t                         evmm64_ds_selector= 0;
   172  
   173  static uint32_t                         evmm64_cr4= 0;
   174  static uint32_t                         evmm64_cr3 = 0;
   175  
   176  static IA32_GDTR                        gdtr_32;
   177  static IA32_GDTR                        gdtr_64;  // still in 32-bit mode
   178  
   179  // location of page in evmm heap that holds 64 bit descriptor table
   180  static uint32_t                         evmm_descriptor_table= 0;
   181  static EM64T_PML4 *                     pml4_table= NULL;
   182  static EM64T_PDPE *                     pdp_table= NULL;
   183  static EM64T_PDE_2MB *                  pd_table= NULL;
   184  
   185  static INIT64_STRUCT                    init64;
   186  static INIT32_STRUCT                    init32;
   187  
   188  uint32_t                                low_mem = 0x8000;
   189  int                                     evmm_num_of_aps= 0;
   190  static uint64_t                         evmm_reserved = 0;
   191  static uint32_t                         local_apic_id = 0;
   192  
   193  static IA32_GDTR                        tboot_gdtr_32;
   194  uint16_t                                tboot_cs_selector= 0;
   195  static uint32_t                         tboot_cs_base= 0;
   196  static uint16_t                         tboot_cs_limit= 0;
   197  static uint16_t                         tboot_cs_attr= 0;
   198  uint16_t                                tboot_ds_selector= 0;
   199  static uint32_t                         tboot_ds_base= 0;
   200  static uint16_t                         tboot_ds_limit= 0;
   201  static uint16_t                         tboot_ds_attr= 0;
   202  uint16_t                                tboot_ss_selector= 0;
   203  static uint32_t                         tboot_ss_base= 0;
   204  static uint16_t                         tboot_ss_limit= 0;
   205  static uint16_t                         tboot_ss_attr= 0;
   206  static uint64_t                         tboot_msr_debugctl= 0;
   207  static uint64_t                         tboot_msr_efer= 0;
   208  static uint64_t                         tboot_msr_pat= 0;
   209  static uint64_t                         tboot_msr_sysenter_cs= 0;
   210  static uint64_t                         tboot_msr_sysenter_esp= 0;
   211  static uint64_t                         tboot_msr_sysenter_eip= 0;
   212  static uint16_t                         tboot_tr_selector= 0;
   213  static uint32_t                         tboot_tr_base= 0;
   214  static uint16_t                         tboot_tr_limit= 0;
   215  static uint16_t                         tboot_tr_attr= 0;
   216  
   217  static uint32_t                         tboot_cr0= 0;
   218  static uint32_t                         tboot_cr2= 0;
   219  static uint32_t                         tboot_cr3= 0;
   220  static uint32_t                         tboot_cr4= 0;
   221  
   222  // guest gdt
   223  static IA32_GDTR __attribute__((aligned (16))) guest_gdtr;
   224  uint64_t __attribute__((aligned (16)))         guest_gdt[16] = {
   225      0x0000000000000000,
   226      0x0000000000000000,         // NULL descriptor 
   227      0x00c09b000000ffff,         // __KERNEL_CS 
   228      0x00c093000000ffff,         // __KERNEL_DS 
   229      0x00808b000000ffff,         // TS descriptor 
   230      0x0000000000000000,         // TS continued 
   231      0x0000000000000000,
   232      0x0000000000000000,
   233      0x0000000000000000,
   234      0x0000000000000000,
   235      0x0000000000000000,
   236      0x0000000000000000,
   237      0x0000000000000000,
   238      0x0000000000000000,
   239      0x0000000000000000,
   240      0x0000000000000000,
   241  };
   242  static uint16_t                         guest_cs_selector= 0;
   243  static uint32_t                         guest_cs_base= 0;
   244  static uint32_t                         guest_cs_limit= 0;
   245  static uint16_t                         guest_cs_attr= 0;
   246  static uint16_t                         guest_ds_selector= 0;
   247  static uint32_t                         guest_ds_base= 0;
   248  static uint32_t                         guest_ds_limit= 0;
   249  static uint16_t                         guest_ds_attr= 0;
   250  static uint16_t                         guest_ss_selector= 0;
   251  static uint32_t                         guest_ss_base= 0;
   252  static uint32_t                         guest_ss_limit= 0;
   253  static uint16_t                         guest_ss_attr= 0;
   254  static uint64_t                         guest_msr_debugctl= 0;
   255  static uint64_t                         guest_msr_efer= 0;
   256  static uint64_t                         guest_msr_pat= 0;
   257  static uint64_t                         guest_msr_sysenter_cs= 0;
   258  static uint64_t                         guest_msr_sysenter_esp= 0;
   259  static uint64_t                         guest_msr_sysenter_eip= 0;
   260  static uint16_t                         guest_tr_selector= 0;
   261  static uint32_t                         guest_tr_base= 0;
   262  static uint32_t                         guest_tr_limit= 0;
   263  static uint16_t                         guest_tr_attr= 0;
   264  
   265  static uint32_t                         guest_cr0= 0;
   266  static uint32_t                         guest_cr2= 0;
   267  static uint32_t                         guest_cr3= 0;
   268  static uint32_t                         guest_cr4= 0;
   269  
   270  
   271  // -------------------------------------------------------------------------
   272  
   273  
   274  // linux guest initialization definitions and globals
   275  
   276  typedef struct VMM_INPUT_PARAMS_S {
   277      uint64_t local_apic_id;
   278      uint64_t startup_struct;
   279      uint64_t guest_params_struct;
   280  } VMM_INPUT_PARAMS;
   281  
   282  VMM_GUEST_STARTUP                       evmm_g0;
   283  VMM_APPLICATION_PARAMS_STRUCT           evmm_a0;
   284  VMM_APPLICATION_PARAMS_STRUCT*          evmm_p_a0= &evmm_a0;
   285  
   286  // state of primary linux guest on startup
   287  VMM_GUEST_CPU_STARTUP_STATE      guest_processor_state[MAXPROCESSORS];
   288  VMM_STARTUP_STRUCT               startup_struct;
   289  VMM_STARTUP_STRUCT *             p_startup_struct = &startup_struct;
   290  
   291  #define PRIMARY_MAGIC 0x0
   292  
   293  // expanded e820 table used by evmm
   294  static unsigned int     evmm_num_e820_entries = 0;
   295  INT15_E820_MEMORY_MAP*  evmm_e820= NULL;
   296  uint64_t                evmm_start_of_e820_table= 0ULL; // 64 bits version
   297  
   298  static const cmdline_option_t linux_cmdline_options[] = {
   299      { "vga", "" },
   300      { "mem", "" },
   301      { NULL, NULL }
   302  };
   303  static char linux_param_values[ARRAY_SIZE(linux_cmdline_options)][MAX_VALUE_LEN];
   304  
   305  
   306  // -------------------------------------------------------------------------
   307  
   308  
   309  // machine setup and paging
   310  
   311  
   312  int ia32_get_selector(uint32_t* p, uint32_t* base, uint16_t* limit, uint16_t* access)
   313  {
   314      IA32_SEGMENT_DESCRIPTOR* desc= (IA32_SEGMENT_DESCRIPTOR*)p;
   315      IA32_SEGMENT_DESCRIPTOR_ATTR attr;
   316  
   317      *base= (UINT32)((desc->gen.lo.base_address_15_00) |
   318          (desc->gen.hi.base_address_23_16 << 16) |
   319          (desc->gen.hi.base_address_31_24 << 24));
   320      *limit= desc->gen.lo.limit_15_00; 
   321      if(desc->gen.hi.granularity)
   322          *limit = (*limit << 12) | 0x00000fff;
   323      attr.attr16 = desc->gen_attr.attributes;
   324      attr.bits.limit_19_16 = 0; 
   325      *access= (uint32_t) attr.attr16;
   326      return 0;
   327  }
   328  
   329  
   330  void ia32_write_ts(uint16_t ts)
   331  {
   332      __asm__ volatile(
   333          "\tmovw %[ts],%%ax\n"
   334          "\tltr  %%ax\n"
   335      ::[ts] "g" (ts)
   336      : "%ax");
   337  }
   338  
   339  
   340  uint16_t ia32_read_ts()
   341  {
   342      uint16_t  ts= 0;
   343  
   344      __asm__ volatile(
   345          "\txorw %%ax,%%ax\n"
   346          "\tstr  %%ax\n"
   347          "\tmovw %%ax, %[ts]\n"
   348      :[ts] "=g" (ts)
   349      : : "%ax");
   350      return ts;
   351  }
   352  
   353  
   354  uint16_t ia32_read_cs()
   355  {
   356      uint16_t  cs= 0;
   357  
   358      __asm__ volatile(
   359          "\txorw %%ax,%%ax\n"
   360          "\tmovw %%cs, %%ax\n"
   361          "\tmovw %%ax, %[cs]\n"
   362      :[cs] "=g" (cs)
   363      : : "%ax");
   364      return cs;
   365  }
   366  
   367  
   368  uint16_t ia32_read_ds()
   369  {
   370      uint16_t  ds= 0;
   371  
   372      __asm__ volatile(
   373          "\txorw %%ax,%%ax\n"
   374          "\tmovw %%ds,%%ax\n"
   375          "\tmovw %%ax, %[ds]\n"
   376      :[ds] "=g" (ds)
   377      : : "%ax");
   378      return ds;
   379  }
   380  
   381  
   382  uint16_t ia32_read_es()
   383  {
   384      uint16_t  es= 0;
   385  
   386      __asm__ volatile(
   387          "\txorw %%ax,%%ax\n"
   388          "\tmovw %%es,%%ax\n"
   389          "\tmovw %%ax, %[es]\n"
   390      :[es] "=g" (es)
   391      : : "%ax");
   392      return es;
   393  }
   394  
   395  
   396  uint16_t ia32_read_fs()
   397  {
   398      uint16_t  fs= 0;
   399  
   400      __asm__ volatile(
   401          "\txorw %%ax,%%ax\n"
   402          "\tmovw %%fs,%%ax\n"
   403          "\tmovw %%ax, %[fs]\n"
   404      :[fs] "=g" (fs)
   405      : : "%ax");
   406      return fs;
   407  }
   408  
   409  
   410  uint16_t ia32_read_gs()
   411  {
   412      uint16_t  gs= 0;
   413  
   414      __asm__ volatile(
   415          "\txorw %%ax,%%ax\n"
   416          "\tmovw %%gs,%%ax\n"
   417          "\tmovw %%ax, %[gs]\n"
   418      :[gs] "=g" (gs)
   419      : : "%ax");
   420      return gs;
   421  }
   422  
   423  
   424  uint16_t ia32_read_ss()
   425  {
   426      uint16_t  ss= 0;
   427  
   428      __asm__ volatile(
   429          "\txorw %%ax,%%ax\n"
   430          "\tmovw %%ss,%%ax\n"
   431          "\tmovw %%ax, %[ss]\n"
   432      :[ss] "=g" (ss)
   433      : : "%ax");
   434      return ss;
   435  }
   436  
   437  
   438  void read_cr0(uint32_t* ret)
   439  {
   440      __asm__ volatile(
   441          "\tmovl  %[ret],%%ebx\n"
   442          "\tmovl  %%cr0,%%eax\n"
   443          "\tmovl %%eax, (%%ebx)\n"
   444      ::[ret] "p" (ret) 
   445      : "%eax","%ebx");
   446  }
   447  
   448  
   449  void read_cr2(uint32_t* ret)
   450  {
   451      __asm__ volatile(
   452          "\tmovl  %[ret],%%ebx\n"
   453          "\tmovl  %%cr2,%%eax\n"
   454          "\tmovl %%eax, (%%ebx)\n"
   455      ::[ret] "p" (ret) 
   456      : "%eax","%ebx");
   457  }
   458  
   459  
   460  void read_cr3(uint32_t* ret)
   461  {
   462      __asm__ volatile(
   463          "\tmovl  %[ret],%%ebx\n"
   464          "\tmovl  %%cr3,%%eax\n"
   465          "\tmovl %%eax, (%%ebx)\n"
   466      ::[ret] "p" (ret) 
   467      : "%eax","%ebx");
   468  }
   469  
   470  
   471  void  write_cr3(uint32_t value)
   472  {
   473      __asm__ volatile(
   474          "\tmovl     %[value], %%eax\n"
   475          "\t movl    %%eax, %%cr3\n"
   476      ::[value] "m" (value)
   477      : "%eax", "cc");
   478  }
   479  
   480  
   481  void read_cr4(uint32_t* ret)
   482  {
   483      __asm__ volatile(
   484          "\tmovl  %[ret],%%ebx\n"
   485          "\tmovl  %%cr4,%%eax\n"
   486          "\tmovl %%eax, (%%ebx)\n"
   487      ::[ret] "r" (ret) 
   488      : "%eax","%ebx");
   489  }
   490  
   491  
   492  void write_cr4(uint32_t val)
   493  {
   494      __asm__ volatile(
   495          "\tmovl  %[val],%%eax\n"
   496          "\tmovl  %%eax, %%cr4\n"
   497      ::[val] "p" (val) 
   498      : "%eax","%ebx");
   499  }
   500  
   501  
   502  void  ia32_read_msr(uint32_t msr_id, uint64_t *p_value)
   503  {
   504      __asm__ volatile(
   505          "\tmovl %[msr_id], %%ecx\n"
   506          "\trdmsr\n"        //write from EDX:EAX into MSR[ECX]
   507          "\tmovl %[p_value], %%ecx\n"
   508          "\tmovl %%eax, (%%ecx)\n"
   509          "\tmovl %%edx, 4(%%ecx)\n"
   510      ::[msr_id] "g" (msr_id), [p_value] "p" (p_value)
   511      :"%eax", "%ecx", "%edx");
   512  }
   513  
   514  
   515  void  ia32_write_msr(uint32_t msr_id, uint64_t *p_value)
   516  {
   517      __asm__ volatile(
   518          "\tmovl %[p_value], %%ecx\n"
   519          "\tmovl (%%ecx), %%eax\n"
   520          "\tmovl 4(%%ecx), %%edx\n"
   521          "\tmovl %[msr_id], %%ecx\n"
   522          "\twrmsr"        //write from EDX:EAX into MSR[ECX]
   523      ::[msr_id] "g" (msr_id), [p_value] "p" (p_value)
   524      :"%eax", "%ecx", "%edx");
   525  }
   526  
   527  
   528  int setup_evmm_stacks()
   529  {
   530      // bsp stack
   531      evmm_bsp_stack_base= (uint32_t) 
   532                  evmm_page_alloc(UVMM_DEFAULT_STACK_SIZE_PAGES);
   533      if(evmm_bsp_stack_base==0) {
   534          return 1;
   535      }
   536      vmm_memset((void*)evmm_bsp_stack_base, 0, 
   537                 PAGE_4KB_SIZE*UVMM_DEFAULT_STACK_SIZE_PAGES);
   538      // stack grows down
   539      evmm_bsp_stack= evmm_bsp_stack_base+
   540                          PAGE_4KB_SIZE*UVMM_DEFAULT_STACK_SIZE_PAGES;
   541      evmm_stack_pointers_array[0]= evmm_bsp_stack;
   542  
   543      // ap stacks
   544      uint32_t evmm_ap_stack_base= 0;
   545      uint32_t evmm_ap_stack= 0;
   546      int i;
   547      for(i=0; i<evmm_num_of_aps;i++) {
   548          evmm_ap_stack_base= (uint32_t) 
   549                  evmm_page_alloc(UVMM_DEFAULT_STACK_SIZE_PAGES);
   550          vmm_memset((void*)evmm_ap_stack_base, 0, 
   551                     PAGE_4KB_SIZE*UVMM_DEFAULT_STACK_SIZE_PAGES);
   552          if(evmm_ap_stack_base==0) {
   553              return 1;
   554          }
   555          // stack grows down
   556          evmm_ap_stack= evmm_ap_stack_base+
   557                          PAGE_4KB_SIZE*UVMM_DEFAULT_STACK_SIZE_PAGES;
   558          evmm_stack_pointers_array[i+1]= evmm_ap_stack;
   559      }
   560      return 0;
   561  }
   562  
   563  
   564  void setup_64bit_descriptors(void)
   565  {
   566      // 1 page for segment descriptors
   567      evmm_descriptor_table = (uint32_t) evmm_page_alloc(1);
   568  
   569      // zero gdt
   570      vmm_memset((void*)evmm_descriptor_table, 0, PAGE_4KB_SIZE);
   571  
   572      // read 32-bit GDTR
   573      __asm__ volatile (
   574         "\tsgdt  %[gdtr_32]\n"
   575      :[gdtr_32] "=m" (gdtr_32)
   576      ::);
   577  
   578      // copy it to the new 64-bit GDT
   579      vmm_memcpy((void*)evmm_descriptor_table, (void *) gdtr_32.base, gdtr_32.limit+1);
   580  
   581      uint32_t  descriptor_base= ((uint32_t)evmm_descriptor_table+gdtr_32.limit+1);
   582  
   583      // 16 byte aligned
   584      descriptor_base= (descriptor_base+15)&(~0xf);
   585  
   586      uint64_t* end_of_desciptor_table= (uint64_t*) descriptor_base;
   587      // cs descriptor
   588      end_of_desciptor_table[0]= 0x00a09a0000000000ULL;
   589      end_of_desciptor_table[1]= 0x0000000000000000ULL;
   590      // ds descriptor
   591      end_of_desciptor_table[2]= 0x00a0920000000000ULL;
   592      end_of_desciptor_table[3]= 0x0000000000000000ULL;
   593  
   594      // selectors
   595      evmm64_cs_selector = (uint32_t)(&end_of_desciptor_table[0]) - 
   596                           (uint32_t)evmm_descriptor_table;
   597      evmm64_ds_selector = (uint32_t)(&end_of_desciptor_table[2]) - 
   598                           (uint32_t)evmm_descriptor_table;
   599  
   600      // set 64 bit
   601      gdtr_64.base= (uint32_t) evmm_descriptor_table;
   602      gdtr_64.limit= gdtr_32.limit + (uint32_t)(&end_of_desciptor_table[4])-
   603                      (uint32_t)(&end_of_desciptor_table[0]);
   604  
   605  #ifdef JLMDEBUG1
   606     bprint("\nevmm64_cs_selector: %d, evmm64_ds_selector: %d\n", 
   607            evmm64_cs_selector, evmm64_ds_selector);
   608     bprint("\ngdtr_32, base: 0x%08x, limit: %d\n", 
   609            gdtr_32.base, gdtr_32.limit);
   610     HexDump((uint8_t*)gdtr_32.base, (uint8_t*)gdtr_32.base+gdtr_32.limit+1);
   611     bprint("\ngdtr_64, base: 0x%08x, limit: %d\n", 
   612            gdtr_64.base, gdtr_64.limit);
   613     HexDump((uint8_t*)gdtr_64.base, (uint8_t*)gdtr_64.base+gdtr_64.limit+1);
   614     // LOOP_FOREVER
   615  #endif
   616      // load gdtr
   617      // ia32_write_gdtr(&gdtr_64);
   618      __asm__ volatile (
   619         "\tlgdt  %[gdtr_64]\n"
   620      :[gdtr_64] "=m" (gdtr_64)
   621      ::);
   622  }
   623  
   624  
   625  //     2MB pages while running in 32-bit mode.
   626  //     It should scope full 32-bit space, i.e. 4G
   627  void setup_64bit_paging(uint64_t memory_size)
   628  {
   629      uint32_t pdpt_entry_id;
   630      uint32_t pdt_entry_id;
   631      uint32_t address = 0;
   632  
   633      if (memory_size > TOTAL_MEM)
   634          memory_size = TOTAL_MEM;
   635  
   636      // To cover 4G-byte addrerss space the minimum set is
   637      // PML4    - 1entry
   638      // PDPT    - 4 entries
   639      // PDT     - 2048 entries
   640      pml4_table = (EM64T_PML4 *) evmm_page_alloc(1);
   641      vmm_memset(pml4_table, 0, PAGE_4KB_SIZE);
   642  
   643      pdp_table = (EM64T_PDPE *) evmm_page_alloc(1);
   644      vmm_memset(pdp_table, 0, PAGE_4KB_SIZE);
   645  
   646      // only one  entry is enough in PML4 table
   647      pml4_table[0].lo.base_address_lo = (uint32_t) pdp_table >> 12;
   648      pml4_table[0].lo.present = 1;
   649      pml4_table[0].lo.rw = 1;
   650      pml4_table[0].lo.us = 0;
   651      pml4_table[0].lo.pwt = 0;
   652      pml4_table[0].lo.pcd = 0;
   653      pml4_table[0].lo.accessed = 0;
   654      pml4_table[0].lo.ignored = 0;
   655      pml4_table[0].lo.zeroes = 0;
   656      pml4_table[0].lo.avl = 0;
   657  
   658      // 4  entries is enough in PDPT
   659      for (pdpt_entry_id = 0; pdpt_entry_id < 4; ++pdpt_entry_id) {
   660          pdp_table[pdpt_entry_id].lo.present = 1;
   661          pdp_table[pdpt_entry_id].lo.rw = 1;
   662          pdp_table[pdpt_entry_id].lo.us = 0;
   663          pdp_table[pdpt_entry_id].lo.pwt = 0;
   664          pdp_table[pdpt_entry_id].lo.pcd = 0;
   665          pdp_table[pdpt_entry_id].lo.accessed = 0;
   666          pdp_table[pdpt_entry_id].lo.ignored = 0;
   667          pdp_table[pdpt_entry_id].lo.zeroes = 0;
   668          pdp_table[pdpt_entry_id].lo.avl = 0;
   669  
   670          pd_table = (EM64T_PDE_2MB *) evmm_page_alloc(1);
   671          vmm_memset(pd_table, 0, PAGE_4KB_SIZE);
   672          pdp_table[pdpt_entry_id].lo.base_address_lo = (uint32_t) pd_table >> 12;
   673  
   674          for (pdt_entry_id = 0; pdt_entry_id < 512; 
   675                  ++pdt_entry_id, address += PAGE_2MB_SIZE) {
   676              pd_table[pdt_entry_id].lo.present = 1;
   677              pd_table[pdt_entry_id].lo.rw = 1;
   678              pd_table[pdt_entry_id].lo.us = 0;
   679              pd_table[pdt_entry_id].lo.pwt = 0;
   680              pd_table[pdt_entry_id].lo.pcd = 0;
   681              pd_table[pdt_entry_id].lo.accessed  = 0;
   682              pd_table[pdt_entry_id].lo.dirty = 0;
   683              pd_table[pdt_entry_id].lo.pse = 1;
   684              pd_table[pdt_entry_id].lo.global = 0;
   685              pd_table[pdt_entry_id].lo.avl = 0;
   686              pd_table[pdt_entry_id].lo.pat = 0;     //????
   687              pd_table[pdt_entry_id].lo.zeroes = 0;
   688              pd_table[pdt_entry_id].lo.base_address_lo = address >> 21;
   689          }
   690      }
   691      // FIX(JLM): add flags here to set caching
   692      evmm64_cr3= (((uint32_t) pml4_table) & 0xfffff000);
   693  }
   694  
   695  
   696  int setup_64bit()
   697  {
   698      // setup_64 bit for 64-bit on BSP
   699      setup_64bit_descriptors();
   700  
   701      // setup paging, control registers and flags on BSP
   702      setup_64bit_paging(TOTAL_MEM);
   703  
   704      // set cr3 and cr4
   705      write_cr3(evmm64_cr3);
   706      read_cr4(&evmm64_cr4);
   707      evmm64_cr4|= PAE_BIT|PSE_BIT;
   708      write_cr4(evmm64_cr4);
   709  
   710      // we don't really use this structure
   711      init64.i64_gdtr= gdtr_64;
   712      init64.i64_cr3= evmm64_cr3;  // note we dont use the structure
   713      init64.i64_cs = evmm64_cs_selector;
   714      init64.i64_efer = 0;
   715      return 0;
   716  }
   717  
   718  
   719  // -------------------------------------------------------------------------
   720  
   721  
   722  extern void startap_main(INIT32_STRUCT *p_init32, INIT64_STRUCT *p_init64,
   723                     VMM_STARTUP_STRUCT *p_startup, uint32_t entry_point);
   724  
   725  
   726  void start_64bit_mode_on_aps(uint32_t stack_pointer, uint32_t start_address, 
   727                  uint32_t segment, uint32_t cpu_id)
   728  {
   729      uint32_t evmm_reserved= 0;
   730  #ifdef JLMDEBUG
   731      bprint("start_64bit_mode_on_aps %u %u, cpu: %d\n", 
   732             stack_pointer, start_address, cpu_id);
   733  #endif
   734      __asm__ volatile (
   735  
   736          "\tcli\n"
   737  
   738          // move start address to ebx for jump
   739          "\tmovl %[start_address], %%ebx\n"
   740  
   741          // initialize CR3 with PML4 base
   742          "\tmovl %[evmm64_cr3], %%eax\n"
   743          "\tmovl %%eax, %%cr3 \n"
   744  
   745          // evmm_initial_stack is alread set, just 16 byte align it
   746          // "movl   %[stack_pointer], %%esp\n"
   747          "\tandl  $0xfffffff8, %%esp\n"
   748  
   749          // prepare arguments for 64-bit mode
   750          // there are 4 arguments (including reserved)
   751          "\txor  %%eax, %%eax\n"
   752          "\tpush %%eax\n"
   753          "\tpush %[evmm_reserved]\n"
   754          "\tpush %%eax\n"
   755          "\tpush %[evmm_p_a0]\n"
   756          "\tpush %%eax\n"
   757          "\tpush %[p_startup_struct]\n"
   758          "\tpush %%eax\n"
   759          "\tpush %[cpu_id]\n"
   760  
   761          // enable 64-bit mode
   762          // EFER MSR register
   763          "\tmovl $0x0c0000080, %%ecx\n"
   764          // read EFER into EAX
   765          "\trdmsr\n"
   766  
   767          // set EFER.LME=1
   768          "\tbts $8, %%eax\n"
   769          // set EFER.LMA=1 on BSP?
   770          "\tbts $10, %%eax\n"
   771          // write EFER
   772          "\twrmsr\n"
   773  
   774          // enable paging CR0.PG=1
   775          "\tmovl %%cr0, %%eax\n"
   776          "\tbts  $31, %%eax\n"
   777          "\tmovl %%eax, %%cr0\n"
   778  
   779          // at this point we are in 32-bit compatibility mode
   780          // LMA=1, CS.L=0, CS.D=1
   781          // jump from 32bit compatibility mode into 64bit mode.
   782          // mode switch
   783          "ljmp   $64, $1f\n"
   784  
   785  "1:\n"
   786          // in 64 bit this is actually pop rdi (cpiuid)
   787          "\tpop %%edi\n"
   788          // in 64 bit this is actually pop rsi (startup)
   789          "\tpop %%esi\n"
   790          // in 64 bit this is actually pop rdx (app struct)
   791          "\tpop %%edx\n"
   792          // in 64 bit this is actually pop rcx (reserved)
   793          "\tpop %%ecx\n"
   794  
   795          // "\tjmp .\n"   // REMOVE!
   796          "\tjmp *%%ebx\n"
   797          "\tud2\n"
   798          :
   799          : [cpu_id] "g" (cpu_id), [p_startup_struct] "g" (p_startup_struct), 
   800            [evmm_p_a0] "g" (evmm_p_a0), [evmm_reserved] "g" (evmm_reserved), 
   801            [start_address] "g" (start_address), [segment] "g" (segment), 
   802            [stack_pointer] "m" (stack_pointer), [evmm64_cr3] "m" (evmm64_cr3)
   803          : "%eax", "%ebx", "%ecx", "%edx");
   804  }
   805  
   806  
   807  void init64_on_aps(uint32_t stack_pointer, INIT64_STRUCT *p_init64_data, 
   808                      uint32_t start_address, uint32_t cpu_id)
   809  {
   810  #ifdef JLMDEBUG
   811      bprint("init64_on_aps %u %u\n", stack_pointer, start_address);
   812  #endif
   813      // ia32_write_gdtr(&p_init64_data->i64_gdtr);
   814      __asm__ volatile (
   815         "\tsgdt  %[gdtr_64]\n"
   816      :[gdtr_64] "=m" (gdtr_64)
   817      ::);
   818  
   819      write_cr3(evmm64_cr3);
   820      write_cr4(evmm64_cr4);
   821      start_64bit_mode_on_aps(stack_pointer, start_address, 
   822                     p_init64_data->i64_cs, cpu_id);
   823  #ifdef JLMDEBUG
   824      // should never get here
   825      bprint("init64_on_aps done\n");
   826      LOOP_FOREVER
   827  #endif
   828  }
   829  
   830  
   831  // -------------------------------------------------------------------------
   832  
   833  
   834  // mbi and e820 support
   835  
   836  
   837  #ifdef JLMDEBUG
   838  void PrintMbi(const multiboot_info_t *mbi)
   839  {
   840      /* print mbi for debug */
   841      unsigned int i;
   842  
   843      bprint("print mbi@%p ...\n", mbi);
   844      bprint("\t flags: 0x%x\n", mbi->flags);
   845      if ( mbi->flags & MBI_MEMLIMITS )
   846          bprint("\t mem_lower: %uKB, mem_upper: %uKB\n", mbi->mem_lower,
   847                 mbi->mem_upper);
   848      if ( mbi->flags & MBI_BOOTDEV ) {
   849          bprint("\t boot_device.bios_driver: 0x%x\n",
   850                 mbi->boot_device.bios_driver);
   851          bprint("\t boot_device.top_level_partition: 0x%x\n",
   852                 mbi->boot_device.top_level_partition);
   853          bprint("\t boot_device.sub_partition: 0x%x\n",
   854                 mbi->boot_device.sub_partition);
   855          bprint("\t boot_device.third_partition: 0x%x\n",
   856                 mbi->boot_device.third_partition);
   857      }
   858      if ( mbi->flags & MBI_CMDLINE ) {
   859  #define CHUNK_SIZE 72 
   860  #if 0
   861          /* Break the command line up into 72 byte chunks */
   862          int   cmdlen = strlen((char*)mbi->cmdline);
   863          char *cmdptr = (char *)mbi->cmdline;
   864          char  chunk[CHUNK_SIZE+1];
   865          bprint("\t cmdline@0x%x: ", mbi->cmdline);
   866          chunk[CHUNK_SIZE] = '\0';
   867          while (cmdlen > 0) {
   868              vmm_strncpy(chunk, cmdptr, CHUNK_SIZE); 
   869              bprint("\n\t\"%s\"", chunk);
   870              cmdptr += CHUNK_SIZE;
   871              cmdlen -= CHUNK_SIZE;
   872          }
   873  #endif
   874          bprint("\n");
   875      }
   876      else {
   877          bprint("no command line\n");
   878      }
   879  
   880      if ( mbi->flags & MBI_MODULES ) {
   881          bprint("\t mods_count: %u, mods_addr: 0x%x\n", mbi->mods_count,
   882                 mbi->mods_addr);
   883          for ( i = 0; i < mbi->mods_count; i++ ) {
   884              module_t *p = (module_t *)(mbi->mods_addr + i*sizeof(module_t));
   885              bprint("\t     %d : mod_start: 0x%x, mod_end: 0x%x\n", i,
   886                     p->mod_start, p->mod_end);
   887              bprint("\t         string (@0x%x): \"%s\"\n", p->string,
   888                     (char *)p->string);
   889          }
   890      }
   891      if ( mbi->flags & MBI_AOUT ) {
   892          const aout_t *p = &(mbi->syms.aout_image);
   893          bprint("\t aout :: tabsize: 0x%x, strsize: 0x%x, addr: 0x%x\n",
   894                 p->tabsize, p->strsize, p->addr);
   895      }
   896      if ( mbi->flags & MBI_ELF ) {
   897          const elf_t *p = &(mbi->syms.elf_image);
   898          bprint("\t elf :: num: %u, size: 0x%x, addr: 0x%x, shndx: 0x%x\n",
   899                 p->num, p->size, p->addr, p->shndx);
   900      }
   901      if ( mbi->flags & MBI_MEMMAP ) {
   902          memory_map_t *p;
   903          bprint("\t mmap_length: 0x%x, mmap_addr: 0x%x\n", mbi->mmap_length,
   904                 mbi->mmap_addr);
   905          for ( p = (memory_map_t *)mbi->mmap_addr;
   906                (uint32_t)p < mbi->mmap_addr + mbi->mmap_length;
   907                p=(memory_map_t *)((uint32_t)p + p->size + sizeof(p->size)) ) {
   908                  bprint("\t     size: 0x%x, base_addr: 0x%04x%04x, "
   909                     "length: 0x%04x%04x, type: %u\n", p->size,
   910                     p->base_addr_high, p->base_addr_low,
   911                     p->length_high, p->length_low, p->type);
   912          }
   913      }
   914      if ( mbi->flags & MBI_DRIVES ) {
   915          bprint("\t drives_length: %u, drives_addr: 0x%x\n", mbi->drives_length,
   916                 mbi->drives_addr);
   917      }
   918      if ( mbi->flags & MBI_CONFIG ) {
   919          bprint("\t config_table: 0x%x\n", mbi->config_table);
   920      }
   921      if ( mbi->flags & MBI_BTLDNAME ) {
   922          bprint("\t boot_loader_name@0x%x: %s\n",
   923                 mbi->boot_loader_name, (char *)mbi->boot_loader_name);
   924      }
   925      if ( mbi->flags & MBI_APM ) {
   926          bprint("\t apm_table: 0x%x\n", mbi->apm_table);
   927      }
   928      if ( mbi->flags & MBI_VBE ) {
   929          bprint("\t vbe_control_info: 0x%x\n"
   930                 "\t vbe_mode_info: 0x%x\n"
   931                 "\t vbe_mode: 0x%x\n"
   932                 "\t vbe_interface_seg: 0x%x\n"
   933                 "\t vbe_interface_off: 0x%x\n"
   934                 "\t vbe_interface_len: 0x%x\n",
   935                 mbi->vbe_control_info,
   936                 mbi->vbe_mode_info,
   937                 mbi->vbe_mode,
   938                 mbi->vbe_interface_seg,
   939                 mbi->vbe_interface_off,
   940                 mbi->vbe_interface_len
   941                );
   942      }
   943  }
   944  #endif
   945  
   946  
   947  module_t *get_module(const multiboot_info_t *mbi, unsigned int i)
   948  {
   949      if ( mbi == NULL ) {
   950          bprint("Error: mbi pointer is zero.\n");
   951          return NULL;
   952      }
   953  
   954      if ( i >= mbi->mods_count ) {
   955          bprint("invalid module #\n");
   956          return NULL;
   957      }
   958  
   959      return (module_t *)(mbi->mods_addr + i * sizeof(module_t));
   960  }
   961  
   962  
   963  unsigned long max(unsigned long a, unsigned long b)
   964  {
   965      if(b>a)
   966          return b;
   967      return a;
   968  }
   969  
   970  
   971  unsigned long get_mbi_mem_end(const multiboot_info_t *mbi)
   972  {
   973      unsigned long end = (unsigned long)(mbi + 1);
   974  
   975      if ( mbi->flags & MBI_CMDLINE )
   976          end = max(end, mbi->cmdline + vmm_strlen((char *)mbi->cmdline) + 1);
   977      if ( mbi->flags & MBI_MODULES ) {
   978          end = max(end, mbi->mods_addr + mbi->mods_count * sizeof(module_t));
   979          unsigned int i;
   980          for ( i = 0; i < mbi->mods_count; i++ ) {
   981              module_t *p = get_module(mbi, i);
   982              end = max(end, p->string + vmm_strlen((char *)p->string) + 1);
   983          }
   984      }
   985      if ( mbi->flags & MBI_AOUT ) {
   986          const aout_t *p = &(mbi->syms.aout_image);
   987          end = max(end, p->addr + p->tabsize
   988                         + sizeof(unsigned long) + p->strsize);
   989      }
   990      if ( mbi->flags & MBI_ELF ) {
   991          const elf_t *p = &(mbi->syms.elf_image);
   992          end = max(end, p->addr + p->num * p->size);
   993      }
   994      if ( mbi->flags & MBI_MEMMAP )
   995          end = max(end, mbi->mmap_addr + mbi->mmap_length);
   996      if ( mbi->flags & MBI_DRIVES )
   997          end = max(end, mbi->drives_addr + mbi->drives_length);
   998      // mbi->config_table field should contain
   999      //  "the address of the rom configuration table returned by the
  1000      //  GET CONFIGURATION bios call", so skip it 
  1001      if ( mbi->flags & MBI_BTLDNAME )
  1002          end = max(end, mbi->boot_loader_name
  1003                         + vmm_strlen((char *)mbi->boot_loader_name) + 1);
  1004      if ( mbi->flags & MBI_APM )
  1005          end = max(end, mbi->apm_table + 20);
  1006      if ( mbi->flags & MBI_VBE ) {
  1007          end = max(end, mbi->vbe_control_info + 512);
  1008          end = max(end, mbi->vbe_mode_info + 256);
  1009      }
  1010  
  1011      return PAGE_UP(end);
  1012  }
  1013  
  1014  
  1015  // -------------------------------------------------------------------------
  1016  
  1017  
  1018  // linux memory initialization and guest setup
  1019  
  1020  
  1021  //  Linux data layout
  1022  //      Start of linux data area    <-- evmm_heap_base - 3*PAGE_SIZE
  1023  //          boot_parameters               <- offset 0
  1024  //          command line                  <- offset sizeof(boot_params_t)
  1025  //      4K stack                    <-- evmm_heap_base - PAGE_SIZE
  1026  //      evmm_heap_base              <-- where linux_esp_register will point
  1027  int allocate_linux_data()
  1028  {
  1029      // setup linux stack
  1030      linux_stack_base = evmm_heap_base-PAGE_SIZE;
  1031      linux_stack_size= PAGE_SIZE;
  1032      vmm_memset((void*)linux_stack_base, 0, PAGE_SIZE); 
  1033  
  1034      // linux data area
  1035  #ifdef LOWBOOTPARAMS
  1036      linux_boot_parameters= 0x20000;
  1037  #else
  1038      linux_boot_parameters= (linux_stack_base-linux_stack_size-2*PAGE_SIZE);
  1039  #endif
  1040      vmm_memset((void*)linux_boot_parameters, 0, 2*PAGE_SIZE); 
  1041      return 0;
  1042  }
  1043  
  1044  
  1045  #ifndef  Ia32VmxVmcsGuestSleepStateWaitForSipi
  1046  // defined in vmx_vmcs.h
  1047  #define  Ia32VmxVmcsGuestSleepStateWaitForSipi 3
  1048  #endif
  1049  
  1050  void setup_real_mode(VMM_GUEST_CPU_STARTUP_STATE *s)
  1051  {
  1052      s->size_of_this_struct = sizeof(VMM_GUEST_CPU_STARTUP_STATE);
  1053      s->version_of_this_struct = VMM_GUEST_CPU_STARTUP_STATE_VERSION;
  1054      s->msr.msr_sysenter_cs = tboot_msr_sysenter_cs;
  1055      s->msr.msr_sysenter_eip = tboot_msr_sysenter_eip;
  1056      s->msr.msr_sysenter_esp = tboot_msr_sysenter_esp;
  1057      s->msr.msr_efer = tboot_msr_efer;
  1058      s->msr.msr_pat = tboot_msr_pat;
  1059      s->msr.msr_debugctl = tboot_msr_debugctl;
  1060      s->msr.pending_exceptions = 0;
  1061      s->msr.interruptibility_state = 0;
  1062      s->msr.activity_state = 0;
  1063      s->msr.smbase = 0;
  1064      s->control.gdtr.base = (UINT64)0;
  1065      s->control.gdtr.limit = (UINT32)0xffff;
  1066      s->control.idtr.base = 0x0;
  1067      s->control.idtr.limit = 0xffff;
  1068      s->control.cr[IA32_CTRL_CR0] = 0;
  1069      s->control.cr[IA32_CTRL_CR2] = 0;
  1070      s->control.cr[IA32_CTRL_CR3] = 0;
  1071      s->control.cr[IA32_CTRL_CR4] = 0;
  1072      s->seg.segment[IA32_SEG_LDTR].attributes = 0x00010000;
  1073      s->seg.segment[IA32_SEG_CS].base = 0;
  1074      s->seg.segment[IA32_SEG_CS].limit = 0xffff;
  1075      s->seg.segment[IA32_SEG_CS].attributes = 0x93;
  1076      s->seg.segment[IA32_SEG_DS].base = 0;
  1077      s->seg.segment[IA32_SEG_DS].limit = 0xffff;
  1078      s->seg.segment[IA32_SEG_DS].attributes = 0x93;
  1079      s->seg.segment[IA32_SEG_ES].base = 0;
  1080      s->seg.segment[IA32_SEG_ES].limit = 0xffff;
  1081      s->seg.segment[IA32_SEG_ES].attributes = 0x93;
  1082      s->seg.segment[IA32_SEG_FS].base = 0;
  1083      s->seg.segment[IA32_SEG_FS].limit = 0xffff;
  1084      s->seg.segment[IA32_SEG_FS].attributes = 0x93;
  1085      s->seg.segment[IA32_SEG_GS].base = 0;
  1086      s->seg.segment[IA32_SEG_GS].limit = 0xffff;
  1087      s->seg.segment[IA32_SEG_GS].attributes = 0x93;
  1088      s->seg.segment[IA32_SEG_SS].base = 0;
  1089      s->seg.segment[IA32_SEG_SS].limit = 0xffff;
  1090      s->seg.segment[IA32_SEG_SS].attributes = 0x93;
  1091      s->gp.reg[IA32_REG_RDX] = 0x0080; // boot from hd0
  1092      s->gp.reg[IA32_REG_RIP] = 0x2000; // grub stage 1.5 buffer
  1093      s->gp.reg[IA32_REG_RSP] = 0x2000; // grub stage 1.5 buffer
  1094      s->gp.reg[IA32_REG_RFLAGS] &= 0xfffffdff;
  1095      return;
  1096  }
  1097  
  1098  
  1099  int linux_setup(void)
  1100  {
  1101      uint32_t        i;
  1102      IA32_IDTR       idtr;
  1103      // stack grows down, BSP only
  1104      linux_esp_register= linux_stack_base+PAGE_SIZE;
  1105  
  1106      // guest state from table
  1107      guest_gdtr.base= (uint32_t)&guest_gdt[0];
  1108      guest_gdtr.limit= 63;
  1109      guest_cs_selector= 0x10;
  1110      guest_cs_base= 0;
  1111      guest_cs_limit= 0xffffffff;
  1112      guest_cs_attr=  0xc09b;
  1113      guest_ds_selector= 0x18;
  1114      guest_ds_base= 0;
  1115      guest_ds_limit= 0xffffffff;
  1116      guest_ds_attr= 0xc093;
  1117      guest_ss_selector= 0x18;
  1118      guest_ss_base=  0;
  1119      guest_ss_limit=  0xffffffff;
  1120      guest_ss_attr= 0xc093;
  1121      guest_tr_selector= 0x20;
  1122      guest_tr_base= 0;
  1123      guest_tr_limit= 0x0000ffff;
  1124      guest_tr_attr= 0x0000808b;
  1125  
  1126      guest_msr_debugctl= tboot_msr_debugctl;
  1127      guest_msr_efer= tboot_msr_efer;
  1128      guest_msr_pat= tboot_msr_pat;
  1129      guest_msr_sysenter_cs= tboot_msr_sysenter_cs;
  1130      guest_msr_sysenter_esp= tboot_msr_sysenter_esp;
  1131      guest_msr_sysenter_eip= tboot_msr_sysenter_eip;
  1132  
  1133      guest_cr0= tboot_cr0;
  1134      guest_cr2= tboot_cr2;
  1135      guest_cr3= 0;
  1136      guest_cr4= tboot_cr4;
  1137  
  1138      // AP's are in real mode waiting for sipi (halt state)
  1139      // Set the VMCS GUEST ACTIVITY STATE in the VMCS Guest State Area
  1140      // to "halted."
  1141      // Bootstrap processor is 0
  1142      int k;
  1143      for(k=0; k<=evmm_num_of_aps; k++) {
  1144          if(k>0) {
  1145              // setup_real_mode(&guest_processor_state[k]);
  1146              continue;
  1147          }
  1148          guest_processor_state[k].size_of_this_struct = 
  1149                          sizeof(VMM_GUEST_CPU_STARTUP_STATE);
  1150          guest_processor_state[k].version_of_this_struct = 
  1151                          VMM_GUEST_CPU_STARTUP_STATE_VERSION;
  1152          guest_processor_state[k].reserved_1 = 0;
  1153  
  1154          // zero out all the registers.  Then set the ones that linux expects.
  1155          for (i = 0; i < IA32_REG_GP_COUNT; i++) {
  1156              guest_processor_state[k].gp.reg[i]= (uint64_t) 0;
  1157          }
  1158  
  1159          guest_processor_state[k].gp.reg[IA32_REG_RIP]= (uint64_t)linux_entry_address;
  1160          guest_processor_state[k].gp.reg[IA32_REG_RDI]= (uint64_t) linux_edi_register;
  1161          guest_processor_state[k].gp.reg[IA32_REG_RSI]= (uint64_t) linux_edi_register;
  1162          guest_processor_state[k].gp.reg[IA32_REG_RSP]= (uint64_t) 
  1163                          evmm_stack_pointers_array[k];
  1164          guest_processor_state[k].gp.reg[IA32_REG_RFLAGS] = 0x02;
  1165          guest_processor_state[k].gp.reg[IA32_REG_RBP] = 0;
  1166      
  1167          for (i = 0; i < IA32_REG_XMM_COUNT; i++) {
  1168              guest_processor_state[k].xmm.reg[i].uint64[0] = (uint64_t)0;
  1169              guest_processor_state[k].xmm.reg[i].uint64[1] = (uint64_t)0;
  1170          }
  1171          guest_processor_state[k].msr.msr_debugctl= guest_msr_debugctl;
  1172          guest_processor_state[k].msr.msr_efer= guest_msr_efer;
  1173          guest_processor_state[k].msr.msr_pat= guest_msr_pat;
  1174          guest_processor_state[k].msr.msr_sysenter_esp= guest_msr_sysenter_esp;
  1175          guest_processor_state[k].msr.msr_sysenter_eip= guest_msr_sysenter_eip;
  1176          guest_processor_state[k].msr.msr_sysenter_cs= guest_msr_sysenter_cs;
  1177          guest_processor_state[k].msr.pending_exceptions = 0;
  1178          guest_processor_state[k].msr.interruptibility_state = 0;
  1179          guest_processor_state[k].msr.activity_state = 0;
  1180          guest_processor_state[k].msr.smbase = 0;
  1181       
  1182          for (i = 0; i < IA32_CTRL_COUNT; i++) {
  1183              guest_processor_state[k].control.cr[i] = 0;
  1184          }
  1185          guest_processor_state[k].control.cr[IA32_CTRL_CR0]= (uint64_t) guest_cr0;
  1186          guest_processor_state[k].control.cr[IA32_CTRL_CR2]= (uint64_t) guest_cr2;
  1187          guest_processor_state[k].control.cr[IA32_CTRL_CR3] = (uint64_t) guest_cr3; 
  1188          guest_processor_state[k].control.cr[IA32_CTRL_CR4]= (uint64_t) guest_cr4;
  1189  
  1190          for (i = 0; i < IA32_SEG_COUNT; i++) {
  1191              guest_processor_state[k].seg.segment[i].base = 0;
  1192              guest_processor_state[k].seg.segment[i].limit = 0;
  1193          }
  1194     
  1195          guest_processor_state[k].control.gdtr.base = (uint64_t)(uint32_t)
  1196                          guest_gdtr.base;
  1197          guest_processor_state[k].control.gdtr.limit = (uint64_t)(uint32_t)
  1198                          guest_gdtr.limit;
  1199  
  1200          __asm__ volatile (
  1201             "\tsgdt  %[idtr]\n"
  1202          :[idtr] "=m" (idtr)
  1203          ::);
  1204  
  1205          guest_processor_state[k].control.idtr.base = (UINT64)idtr.base;
  1206          guest_processor_state[k].control.idtr.limit = (UINT32)idtr.limit;
  1207      
  1208          guest_processor_state[k].seg.segment[IA32_SEG_CS].selector = 
  1209                          (uint64_t) guest_cs_selector;
  1210          guest_processor_state[k].seg.segment[IA32_SEG_DS].selector = 
  1211                          (uint64_t) guest_ds_selector;
  1212          guest_processor_state[k].seg.segment[IA32_SEG_SS].selector = 
  1213                          (uint64_t) guest_ss_selector;
  1214          guest_processor_state[k].seg.segment[IA32_SEG_ES].selector = 
  1215                          (uint64_t) guest_ds_selector;
  1216          guest_processor_state[k].seg.segment[IA32_SEG_FS].selector = 
  1217                          (uint64_t) guest_ds_selector;
  1218          guest_processor_state[k].seg.segment[IA32_SEG_GS].selector = 
  1219                          (uint64_t) guest_ds_selector;
  1220          guest_processor_state[k].seg.segment[IA32_SEG_TR].selector= 
  1221                          (uint64_t) guest_tr_selector;
  1222  
  1223          guest_processor_state[k].seg.segment[IA32_SEG_CS].base = guest_cs_base;
  1224          guest_processor_state[k].seg.segment[IA32_SEG_DS].base = guest_ds_base;
  1225          guest_processor_state[k].seg.segment[IA32_SEG_SS].base = guest_ss_base;
  1226          guest_processor_state[k].seg.segment[IA32_SEG_ES].base = guest_ds_base;
  1227          guest_processor_state[k].seg.segment[IA32_SEG_FS].base = guest_ds_base;
  1228          guest_processor_state[k].seg.segment[IA32_SEG_GS].base = guest_ds_base;
  1229          guest_processor_state[k].seg.segment[IA32_SEG_TR].base = guest_ds_base;
  1230  
  1231          guest_processor_state[k].seg.segment[IA32_SEG_CS].limit = guest_cs_limit;
  1232          guest_processor_state[k].seg.segment[IA32_SEG_DS].limit = guest_ds_limit;
  1233          guest_processor_state[k].seg.segment[IA32_SEG_SS].limit = guest_ss_limit;
  1234          guest_processor_state[k].seg.segment[IA32_SEG_ES].limit = guest_ds_limit;
  1235          guest_processor_state[k].seg.segment[IA32_SEG_FS].limit = guest_ds_limit;
  1236          guest_processor_state[k].seg.segment[IA32_SEG_GS].limit = guest_ds_limit;
  1237          guest_processor_state[k].seg.segment[IA32_SEG_TR].limit = guest_tr_limit;
  1238  
  1239          guest_processor_state[k].seg.segment[IA32_SEG_CS].attributes = guest_cs_attr;
  1240          guest_processor_state[k].seg.segment[IA32_SEG_DS].attributes = guest_ds_attr;
  1241          guest_processor_state[k].seg.segment[IA32_SEG_SS].attributes = guest_ss_attr;
  1242          guest_processor_state[k].seg.segment[IA32_SEG_ES].attributes = guest_ds_attr;
  1243          guest_processor_state[k].seg.segment[IA32_SEG_FS].attributes = guest_ds_attr;
  1244          guest_processor_state[k].seg.segment[IA32_SEG_GS].attributes = guest_ds_attr;
  1245          guest_processor_state[k].seg.segment[IA32_SEG_TR].attributes = guest_tr_attr;
  1246      
  1247          guest_processor_state[k].seg.segment[IA32_SEG_LDTR].selector= 0x0;
  1248          guest_processor_state[k].seg.segment[IA32_SEG_LDTR].base= 0x0;
  1249          guest_processor_state[k].seg.segment[IA32_SEG_LDTR].limit= 0xffffffff;
  1250          guest_processor_state[k].seg.segment[IA32_SEG_LDTR].attributes= 0x00010000;
  1251      }
  1252      return 0;
  1253  }
  1254  
  1255  
  1256  // This builds the 24 byte extended e820 table
  1257  static uint64_t evmm_get_e820_table(const multiboot_info_t *mbi) 
  1258  {
  1259      uint32_t entry_offset = 0;
  1260      int i= 0;
  1261  
  1262      evmm_e820 = (INT15_E820_MEMORY_MAP *)evmm_page_alloc(1);
  1263      if (evmm_e820 == NULL)
  1264          return (uint64_t)-1;
  1265  
  1266      while ( entry_offset < mbi->mmap_length ) {
  1267          memory_map_t *entry = (memory_map_t *) 
  1268                      (mbi->mmap_addr + entry_offset);
  1269          evmm_e820->memory_map_entry[i].basic_entry.base_address = 
  1270                      (((uint64_t)entry->base_addr_high)<<32)+entry->base_addr_low;
  1271          evmm_e820->memory_map_entry[i].basic_entry.length = 
  1272                      (((uint64_t)entry->length_high)<<32)+entry->length_low;
  1273          evmm_e820->memory_map_entry[i].basic_entry.address_range_type= 
  1274                      entry->type;
  1275          evmm_e820->memory_map_entry[i].extended_attributes.uint32 = 1;
  1276          i++;
  1277         entry_offset += entry->size + sizeof(entry->size);
  1278      }
  1279      evmm_num_e820_entries = i;
  1280      evmm_e820->memory_map_size = i*sizeof(INT15_E820_MEMORY_MAP_ENTRY_EXT);
  1281      evmm_start_of_e820_table = (uint64_t)(uint32_t)evmm_e820;
  1282  
  1283      return evmm_start_of_e820_table;
  1284  }
  1285  
  1286  
  1287  void linux_parse_cmdline(const char *cmdline)
  1288  {
  1289      cmdline_parse(cmdline, linux_cmdline_options, linux_param_values);
  1290  }
  1291  
  1292  
  1293  int get_linux_vga(int *vid_mode)
  1294  {
  1295      const char *vga = get_option_val(linux_cmdline_options,
  1296                                       linux_param_values, "vga");
  1297      if ( vga == NULL || vid_mode == NULL )
  1298          return 1;
  1299      if ( vmm_strcmp(vga, "normal") == 0 )
  1300          *vid_mode = 0xFFFF;
  1301      else if ( vmm_strcmp(vga, "ext") == 0 )
  1302          *vid_mode = 0xFFFE;
  1303      else if ( vmm_strcmp(vga, "ask") == 0 )
  1304          *vid_mode = 0xFFFD;
  1305      else
  1306          *vid_mode = vmm_strtoul(vga, NULL, 0);
  1307      return 0;
  1308  }
  1309  
  1310  
  1311  unsigned long get_bootstrap_mem_end(void)
  1312  {
  1313      return PAGE_UP(bootstrap_end);
  1314  }
  1315  
  1316  
  1317  static inline bool plus_overflow_u32(uint32_t x, uint32_t y)
  1318  {
  1319      return ((((uint32_t)(~0)) - x) < y);
  1320  }
  1321  
  1322  
  1323  // expand linux kernel with kernel image and initrd image 
  1324  int expand_linux_image( multiboot_info_t* mbi,
  1325                          uint32_t linux_image, uint32_t linux_size,
  1326                          uint32_t initrd_image, uint32_t initrd_size,
  1327                          uint32_t* entry_point)
  1328  {
  1329      linux_kernel_header_t *hdr;
  1330      uint32_t real_mode_base, protected_mode_base;
  1331      unsigned long real_mode_size, protected_mode_size;
  1332      // Note: real_mode_size + protected_mode_size = linux_size 
  1333      uint32_t initrd_base;
  1334      int vid_mode = 0;
  1335      boot_params_t*  boot_params;
  1336  
  1337      // sanity check
  1338      if ( linux_image == 0) {
  1339          bprint("Error: Linux kernel image is zero.\n");
  1340          return 1;
  1341      }
  1342      if ( linux_size == 0 ) {
  1343          bprint("Error: Linux kernel size is zero.\n");
  1344          return 1;
  1345      }
  1346      
  1347      if ( linux_size < sizeof(linux_kernel_header_t) ) {
  1348          bprint("Error: Linux kernel size is too small.\n");
  1349          return 1;
  1350      }
  1351      hdr = (linux_kernel_header_t *)(linux_image + KERNEL_HEADER_OFFSET);
  1352      if ( hdr == NULL ) {
  1353          bprint("Error: Linux kernel header is zero.\n");
  1354          return 1;
  1355      }
  1356      if ( entry_point == NULL ) {
  1357          bprint("Error: Output pointer is zero.\n");
  1358          return 1;
  1359      }
  1360  
  1361      // recommended layout
  1362      //    0x0000 - 0x7FFF     Real mode kernel
  1363      //    0x8000 - 0x8FFF     Stack and heap
  1364      //    0x9000 - 0x90FF     Kernel command line
  1365  
  1366      // if setup_sects is zero, set to default value 4 
  1367      if ( hdr->setup_sects == 0 )
  1368          hdr->setup_sects = DEFAULT_SECTOR_NUM;
  1369      if ( hdr->setup_sects > MAX_SECTOR_NUM ) {
  1370          bprint("Error: Linux setup sectors %d exceed maximum limitation 64.\n",
  1371                  hdr->setup_sects);
  1372          return 1;
  1373      }
  1374  
  1375      // set vid_mode
  1376      linux_parse_cmdline(linux_command_line);
  1377      if (get_linux_vga(&vid_mode))
  1378          hdr->vid_mode = vid_mode;
  1379  
  1380      // compare to the magic number 
  1381      if ( hdr->header != HDRS_MAGIC ) {
  1382          bprint("Error: Old kernel (< 2.6.20) is not supported by tboot.\n");
  1383          return 1;
  1384      }
  1385      if ( hdr->version < 0x0205 ) {
  1386          bprint("Error: Old kernel (<2.6.20) is not supported by tboot.\n");
  1387          return 1;
  1388      }
  1389      // boot loader is grub, set type_of_loader to 0x7
  1390      hdr->type_of_loader = LOADER_TYPE_GRUB;
  1391  
  1392      // set loadflags and heap_end_ptr 
  1393      hdr->loadflags |= FLAG_CAN_USE_HEAP;         // can use heap
  1394      hdr->heap_end_ptr = KERNEL_CMDLINE_OFFSET - BOOT_SECTOR_OFFSET;
  1395  
  1396      // load initrd and set ramdisk_image and ramdisk_size 
  1397      // The initrd should typically be located as high in memory as
  1398      //   possible, as it may otherwise get overwritten by the early
  1399      //   kernel initialization sequence. 
  1400      uint64_t mem_limit = TOTAL_MEM;
  1401  
  1402      uint64_t max_ram_base, max_ram_size;
  1403      get_highest_sized_ram(initrd_size, mem_limit,
  1404                            &max_ram_base, &max_ram_size);
  1405      if ( max_ram_size == 0 ) {
  1406          bprint("not enough RAM for initrd\n");
  1407          return 1;
  1408      }
  1409      if ( initrd_size > max_ram_size ) {
  1410          bprint("initrd_size is too large\n");
  1411          return 1;
  1412      }
  1413      if ( max_ram_base > ((uint64_t)(uint32_t)(~0)) ) {
  1414          bprint("max_ram_base is too high\n");
  1415          return 1;
  1416      }
  1417      initrd_base = (max_ram_base + max_ram_size - initrd_size) & PAGE_MASK;
  1418  
  1419      // should not exceed initrd_addr_max 
  1420      if ( (initrd_base + initrd_size) > hdr->initrd_addr_max ) {
  1421          if ( hdr->initrd_addr_max < initrd_size ) {
  1422              bprint("initrd_addr_max is too small\n");
  1423              return 1;
  1424          }
  1425          initrd_base = hdr->initrd_addr_max - initrd_size;
  1426          initrd_base = initrd_base & PAGE_MASK;
  1427      }
  1428  
  1429      vmm_memcpy ((void *)initrd_base, (void*)initrd_image, initrd_size);
  1430  
  1431      hdr->ramdisk_image = initrd_base;
  1432      hdr->ramdisk_size = initrd_size;
  1433  
  1434      // calc location of real mode part 
  1435      // FIX (JLM) TBOOT defines
  1436      real_mode_base = LEGACY_REAL_START;
  1437      if ( mbi->flags & MBI_MEMLIMITS )
  1438          real_mode_base = (mbi->mem_lower << 10) - REAL_MODE_SIZE;
  1439      if ( real_mode_base < TBOOT_KERNEL_CMDLINE_ADDR +
  1440           TBOOT_KERNEL_CMDLINE_SIZE )
  1441          real_mode_base = TBOOT_KERNEL_CMDLINE_ADDR +
  1442              TBOOT_KERNEL_CMDLINE_SIZE;
  1443      if ( real_mode_base > LEGACY_REAL_START )
  1444          real_mode_base = LEGACY_REAL_START;
  1445      real_mode_size = (hdr->setup_sects + 1) * SECTOR_SIZE;
  1446      if ( real_mode_size + sizeof(boot_params_t) > KERNEL_CMDLINE_OFFSET ) {
  1447          bprint("realmode data is too large\n");
  1448          return 1;
  1449      }
  1450  
  1451      // calc location of protected mode part
  1452      protected_mode_size = linux_size - real_mode_size;
  1453  
  1454      // if kernel is relocatable then move it above tboot 
  1455      // else it may expand over top of tboot 
  1456      if ( hdr->relocatable_kernel ) {
  1457  #if 1
  1458          protected_mode_base =  0x02000000;
  1459          hdr->loadflags|= 0x40;  // keep segments
  1460  #else
  1461          bprint("relocatable kernel\n");
  1462          protected_mode_base = (uint32_t)get_bootstrap_mem_end();
  1463          /* fix possible mbi overwrite in grub2 case */
  1464          /* assuming grub2 only used for relocatable kernel */
  1465          /* assuming mbi & components are contiguous */
  1466          unsigned long mbi_end = get_mbi_mem_end(mbi);
  1467          if ( mbi_end > protected_mode_base )
  1468              protected_mode_base = mbi_end;
  1469          // overflow? 
  1470          if ( plus_overflow_u32(protected_mode_base,
  1471                   hdr->kernel_alignment - 1) ) {
  1472              bprint("protected_mode_base overflows\n");
  1473              return 1;
  1474          }
  1475          // round it up to kernel alignment
  1476          protected_mode_base= (protected_mode_base+hdr->kernel_alignment-1)
  1477                                & ~(hdr->kernel_alignment-1);
  1478  #endif
  1479          hdr->code32_start = protected_mode_base;
  1480      }
  1481      else if ( hdr->loadflags & FLAG_LOAD_HIGH ) {
  1482          protected_mode_base =  LINUX_DEFAULT_LOAD_ADDRESS; // bzImage:0x100000 
  1483          if ( plus_overflow_u32(protected_mode_base, protected_mode_size) ) {
  1484              bprint("protected_mode_base+protected_mode_size overflows\n");
  1485              return 1;
  1486          }
  1487          // Check: protected mode part cannot exceed mem_upper 
  1488          if ( mbi->flags & MBI_MEMLIMITS )
  1489              if ( (protected_mode_base + protected_mode_size)
  1490                      > ((mbi->mem_upper << 10) + 0x100000) ) {
  1491                  bprint("Error: Linux protected mode part (0x%lx ~ 0x%lx) "
  1492                         "exceeds mem_upper (0x%lx ~ 0x%lx).\n",
  1493                         (unsigned long)protected_mode_base,
  1494                         (unsigned long)(protected_mode_base + protected_mode_size),
  1495                         (unsigned long)0x100000,
  1496                         (unsigned long)((mbi->mem_upper << 10) + 0x100000));
  1497                  return 1;
  1498              }
  1499      }
  1500      else {
  1501          bprint("Error: Linux protected mode not loaded high\n");
  1502          return 1;
  1503      }
  1504  
  1505      // set cmd_line_ptr 
  1506      hdr->cmd_line_ptr = real_mode_base + KERNEL_CMDLINE_OFFSET;
  1507  
  1508      // load protected-mode part 
  1509      vmm_memcpy((void *)protected_mode_base, 
  1510                 (void*)(linux_image + real_mode_size),
  1511                 protected_mode_size);
  1512  #ifdef JLMDEBUG1
  1513      bprint("Kernel (protected mode) from 0x%lx to 0x%lx\n",
  1514             (unsigned long)protected_mode_base,
  1515             (unsigned long)(protected_mode_base + protected_mode_size));
  1516  #endif
  1517  
  1518      // load real-mode part 
  1519      vmm_memcpy((void *)real_mode_base, (void*)linux_image, real_mode_size);
  1520  #ifdef JLMDEBUG1
  1521      bprint("Kernel (real mode) from 0x%lx to 0x%lx\n",
  1522             (unsigned long)real_mode_base,
  1523             (unsigned long)(real_mode_base + real_mode_size));
  1524  #endif
  1525  
  1526      // copy cmdline 
  1527      if ( mbi->flags & MBI_CMDLINE ) {
  1528          const char *kernel_cmdline = skip_filename((const char *)mbi->cmdline);
  1529          vmm_memcpy((void *)hdr->cmd_line_ptr, kernel_cmdline, 
  1530                 vmm_strlen((const char*)kernel_cmdline));
  1531      }
  1532  
  1533      // need to put boot_params in real mode area so it gets mapped 
  1534      boot_params = (boot_params_t *)(real_mode_base + real_mode_size);
  1535      vmm_memset(boot_params, 0, sizeof(*boot_params));
  1536      vmm_memcpy(&boot_params->hdr, hdr, sizeof(*hdr));
  1537  
  1538      // copy e820 table to boot parameters
  1539      if ( mbi->flags & MBI_MEMMAP ) {
  1540          int i;
  1541  
  1542          memory_map_t *p = (memory_map_t *)mbi->mmap_addr;
  1543          for ( i = 0; (uint32_t)p < mbi->mmap_addr + mbi->mmap_length; i++ ) {
  1544              boot_params->e820_map[i].addr = ((uint64_t)p->base_addr_high << 32)
  1545                                              | (uint64_t)p->base_addr_low;
  1546              boot_params->e820_map[i].size = ((uint64_t)p->length_high << 32)
  1547                                              | (uint64_t)p->length_low;
  1548              boot_params->e820_map[i].type = p->type;
  1549              p = (void *)p + p->size + sizeof(p->size);
  1550          }
  1551          boot_params->e820_entries = i;
  1552      }
  1553  
  1554      screen_info_t *screen = (screen_info_t *)&boot_params->screen_info;
  1555      screen->orig_video_mode = 3;       // BIOS 80*25 text mode
  1556      screen->orig_video_lines = 25;
  1557      screen->orig_video_cols = 80;
  1558      screen->orig_video_points = 16;    // set font height to 16 pixels
  1559      screen->orig_video_isVGA = 1;      // use VGA text screen setups 
  1560      screen->orig_y = 24;               // last line of screen 
  1561      linux_original_boot_parameters= (uint32_t) boot_params;
  1562      linux_real_mode_start= real_mode_base;
  1563      linux_real_mode_size= real_mode_size;
  1564      linux_protected_mode_start= protected_mode_base;
  1565      linux_protected_mode_size= protected_mode_size;
  1566      initram_start_address= initrd_base;
  1567      *entry_point = hdr->code32_start;
  1568      return 0;
  1569  }
  1570  
  1571  
  1572  #ifdef INCLUDETBOOTCMDLINE
  1573  int adjust_kernel_cmdline(multiboot_info_t *mbi,
  1574                            const void *tboot_shared_addr)
  1575  {
  1576      const char *old_cmdline;
  1577  
  1578      if ( mbi->flags & MBI_CMDLINE && mbi->cmdline != 0 )
  1579          old_cmdline = (const char *)mbi->cmdline;
  1580      else
  1581          old_cmdline = "";
  1582  
  1583      vscnprintf(new_cmdline, TBOOT_KERNEL_CMDLINE_SIZE, "%s tboot=%p",
  1584               old_cmdline, tboot_shared_addr);
  1585      new_cmdline[TBOOT_KERNEL_CMDLINE_SIZE - 1] = '\0';
  1586  
  1587      mbi->cmdline = (u32)new_cmdline;
  1588      mbi->flags |= MBI_CMDLINE;
  1589  
  1590      return 0;
  1591  }
  1592  #endif
  1593  
  1594  
  1595  // relocate and setup variables for evmm entry
  1596  int prepare_primary_guest_args(multiboot_info_t *mbi)
  1597  {
  1598    (void)mbi;
  1599      if(linux_original_boot_parameters==0) {
  1600          bprint("original boot parameters not set\n");
  1601          return 1;
  1602      }
  1603      if(linux_boot_parameters==0) {
  1604        bprint("linux boot parameter area fails\n");
  1605        LOOP_FOREVER
  1606      }
  1607      vmm_memcpy((void*)linux_boot_parameters, (void*)linux_original_boot_parameters,
  1608                 sizeof(boot_params_t));
  1609      boot_params_t* new_boot_params= (boot_params_t*)linux_boot_parameters;
  1610      new_boot_params->e820_entries= g_nr_map;
  1611  
  1612      // set address of copied tboot shared page 
  1613  #ifdef TBOOT_SHARED_PAGE
  1614      *(uint64_t *)(new_boot_params->tboot_shared_addr)= (uint32_t)shared_page;
  1615  #else
  1616      *(uint64_t *)(new_boot_params->tboot_shared_addr)= 0;
  1617  #endif
  1618  
  1619      // copy command line after boot parameters
  1620      new_cmdline= (char*)(linux_boot_parameters+sizeof(boot_params_t));
  1621  #ifdef INCLUDETBOOTCMDLINE
  1622      if(adjust_kernel_cmdline(mbi, (const void*)new_boot_params->tboot_shared_addr)!=0) {
  1623        bprint("cant adjust linux command line\n");
  1624        LOOP_FOREVER
  1625      }
  1626  #else
  1627      if(linux_command_line!=0) {
  1628          vmm_memcpy((void*)new_cmdline, (void*) linux_command_line,
  1629                 vmm_strlen((char*)linux_command_line)+1);
  1630          new_boot_params->hdr.cmdline_size= vmm_strlen(new_cmdline)+1;
  1631          new_boot_params->hdr.cmd_line_ptr= (uint32_t) new_cmdline;
  1632      }
  1633      else {
  1634          new_cmdline= NULL;
  1635          new_boot_params->hdr.cmdline_size= 0;
  1636          new_boot_params->hdr.cmd_line_ptr= (uint32_t) 0;
  1637      }
  1638  #endif
  1639      // set edi register
  1640      linux_edi_register= linux_boot_parameters;
  1641  #ifdef JLMDEBUG1  // TEST1
  1642      bprint("edi reg: %08x\n", linux_edi_register);
  1643      bprint("cmd_ptr: %08x, code32_start: %08x\n", new_boot_params->hdr.cmd_line_ptr, 
  1644  new_boot_params->hdr.code32_start);
  1645      LOOP_FOREVER
  1646  #endif
  1647      return 0;
  1648  }
  1649  
  1650  
  1651  int prepare_linux_image_for_evmm(multiboot_info_t *mbi)
  1652  {
  1653      if (linux_start==0 || initram_start==0) {
  1654          bprint("bad linux image or initram image\n");
  1655          return 1;
  1656      }
  1657  
  1658      // make linux mbi 
  1659      // get correct command line and correct mbi
  1660      vmm_memset((void*) &linux_mbi, 0, sizeof(multiboot_info_t));
  1661      vmm_memcpy((void*) &linux_mbi, (void*)mbi, sizeof(multiboot_info_t));
  1662  #ifdef JLMDEBUG1
  1663      if ( mbi->flags & MBI_CMDLINE ) {
  1664          bprint("copied mbi has a command line\n");
  1665      }
  1666      else {
  1667          bprint("copied mbi does not have a command line\n");
  1668      }
  1669      bprint("original mbi, %d modules, size %d\n", 
  1670             mbi->mods_count, sizeof(multiboot_info_t));
  1671  #endif
  1672      if(linux_command_line!=0) {
  1673          mbi->flags|= MBI_CMDLINE;
  1674          mbi->cmdline= (uint32_t)linux_command_line;
  1675      }
  1676      linux_mbi.mods_count--;
  1677      linux_mbi.mods_addr+= sizeof(module_t);
  1678      linux_mbi.mmap_addr= (uint32_t)g_copy_e820_map;
  1679      linux_mbi.mmap_length= g_nr_map*sizeof(memory_map_t);
  1680  
  1681  #ifdef JLMDEBUG1
  1682      bprint("linux mbi, %d modules\n", linux_mbi.mods_count);
  1683  #endif
  1684  
  1685      if(expand_linux_image(&linux_mbi, linux_start, linux_end-linux_start,
  1686                         initram_start, initram_end-initram_start, 
  1687                         &linux_entry_address)!=0) {
  1688          bprint("cannot expand linux image\n");
  1689          return 1;
  1690      }
  1691  #ifdef JLMDEBUG1
  1692      bprint("linux_real_mode_start, linux_real_mode_size: 0x%08x %d\n",
  1693              linux_real_mode_start, linux_real_mode_size);
  1694      bprint("linux_protected_mode_start, linux_protected_mode_size: 0x%08x %d\n",
  1695              linux_protected_mode_start, linux_protected_mode_size);
  1696      bprint("initram_start_address: 0x%08x\n", initram_start_address);
  1697  #endif
  1698      if(prepare_primary_guest_args(&linux_mbi)!=0) {
  1699          bprint("cannot prepare_primary_guest_args\n");
  1700          return 1;
  1701      }
  1702      // CHECK
  1703      linux_start_address= linux_protected_mode_start;
  1704  
  1705  #ifdef JLMDEBUG1
  1706      // print header
  1707      linux_kernel_header_t* hdr = (linux_kernel_header_t*)
  1708                  (linux_start + KERNEL_HEADER_OFFSET);
  1709      bprint("linux header\n");
  1710      bprint("setup_sects 0x%02x, code32_start 0x%08x\n", 
  1711          hdr->setup_sects, hdr->code32_start);
  1712      bprint("payload offset 0x%08x, payload length 0x%08x\n",
  1713          hdr->payload_offset, hdr->payload_length);
  1714      bprint("heap_end_ptr 0x%08x, command line: 0x%08x\n", 
  1715          hdr->heap_end_ptr, hdr->cmd_line_ptr);
  1716      bprint("ramdisk image 0x%08x, ramdisk size %d\n", 
  1717          hdr->ramdisk_image, hdr->ramdisk_size);
  1718  #endif
  1719      return 0;
  1720  }
  1721  
  1722  
  1723  #define MIN_ANONYMOUS_GUEST_ID  30000
  1724  #define GUEST_IS_PRIMARY_FLAG 1
  1725  #define GUEST_IS_NMI_OWNER_FLAG 2
  1726  #define GUEST_IS_ACPI_OWNER_FLAG 4
  1727  #define GUEST_IS_DEFAULT_DEVICE_OWNER_FLAG 8
  1728  #define GUEST_BIOS_ACCESS_ENABLED_FLAG 16
  1729  #define GUEST_SAVED_IMAGE_IS_COMPRESSED_FLAG 32
  1730  
  1731  
  1732  int prepare_primary_guest_environment(const multiboot_info_t *mbi)
  1733  {
  1734      // setup stack ,control and gp registers for VMCS to init guest
  1735      // Guest wakes up in 32 bit protected mode with arguments in edi
  1736      linux_setup(); 
  1737  
  1738      // Guest state initialization for relocated image
  1739      evmm_g0.size_of_this_struct = sizeof(VMM_GUEST_STARTUP);
  1740      evmm_g0.version_of_this_struct = VMM_GUEST_STARTUP_VERSION;
  1741      evmm_g0.flags = 0;
  1742      BITMAP_SET(evmm_g0.flags, VMM_GUEST_FLAG_LAUNCH_IMMEDIATELY);
  1743      evmm_g0.flags= GUEST_IS_PRIMARY_FLAG | GUEST_IS_DEFAULT_DEVICE_OWNER_FLAG;
  1744      evmm_g0.guest_magic_number = MIN_ANONYMOUS_GUEST_ID;
  1745      evmm_g0.cpu_affinity = -1;
  1746  #if 0
  1747      evmm_g0.cpu_states_count = 1+evmm_num_of_aps;
  1748  #else
  1749      evmm_g0.cpu_states_count = 1;
  1750  #endif
  1751      evmm_g0.devices_count = 0;
  1752      evmm_g0.image_size = 0; // linux_end - linux_start;
  1753      evmm_g0.image_address= linux_start_address;
  1754      evmm_g0.image_offset_in_guest_physical_memory = linux_start_address;
  1755      evmm_g0.physical_memory_size = 0; 
  1756  
  1757      // linux state was prepared by linux_setup
  1758      evmm_g0.cpu_states_array = (uint32_t)guest_processor_state;
  1759  
  1760      // This pointer makes sense only if the devices_count > 0
  1761      evmm_g0.devices_array = 0;
  1762  
  1763      // Startup struct initialization
  1764      p_startup_struct->version_of_this_struct = VMM_STARTUP_STRUCT_VERSION;
  1765      p_startup_struct->size_of_this_struct = sizeof(VMM_STARTUP_STRUCT);
  1766      p_startup_struct->number_of_processors_at_install_time = 1+evmm_num_of_aps;
  1767      p_startup_struct->number_of_processors_at_boot_time = 1+evmm_num_of_aps;
  1768      p_startup_struct->number_of_secondary_guests = 0; 
  1769      p_startup_struct->size_of_vmm_stack = UVMM_DEFAULT_STACK_SIZE_PAGES; 
  1770      p_startup_struct->unsupported_vendor_id = 0; 
  1771      p_startup_struct->unsupported_device_id = 0; 
  1772      p_startup_struct->flags = 0; 
  1773      
  1774      p_startup_struct->default_device_owner= PRIMARY_MAGIC;
  1775      p_startup_struct->acpi_owner= PRIMARY_MAGIC; 
  1776      p_startup_struct->nmi_owner= PRIMARY_MAGIC; 
  1777      p_startup_struct->primary_guest_startup_state = 
  1778                                  (uint64_t)(uint32_t)&evmm_g0;
  1779  
  1780      // excluded regions
  1781      p_startup_struct->num_excluded_regions= 4;
  1782      // remove evmm from ept
  1783      // tmroeder: switched from evmm_image_size to evmm_mem_size
  1784      (p_startup_struct->vmm_memory_layout[0]).image_size = 
  1785                          evmm_mem_size;
  1786      (p_startup_struct->vmm_memory_layout[0]).total_size = 
  1787                          evmm_total_size;
  1788      (p_startup_struct->vmm_memory_layout[0]).base_address = 
  1789                          evmm_start_address;
  1790      (p_startup_struct->vmm_memory_layout[0]).entry_point =  
  1791                          vmm_main_entry_point;
  1792      // remove evmm initial data from ept
  1793      (p_startup_struct->vmm_memory_layout[1]).image_size = 
  1794                          evmm_heap_size;
  1795      (p_startup_struct->vmm_memory_layout[1]).total_size =  
  1796                          evmm_heap_size;
  1797      (p_startup_struct->vmm_memory_layout[1]).base_address =  
  1798                          evmm_heap_base;
  1799      (p_startup_struct->vmm_memory_layout[1]).entry_point =  
  1800                          evmm_heap_base;
  1801      // remove tboot from ept
  1802      (p_startup_struct->vmm_memory_layout[2]).image_size = 
  1803                          tboot_end-tboot_start;
  1804      (p_startup_struct->vmm_memory_layout[2]).total_size =  
  1805                          tboot_end-tboot_start;
  1806      (p_startup_struct->vmm_memory_layout[2]).base_address =  
  1807                          tboot_start;
  1808      (p_startup_struct->vmm_memory_layout[2]).entry_point =  
  1809                          tboot_start;
  1810      // remove bootstrap from ept
  1811      (p_startup_struct->vmm_memory_layout[3]).image_size = 
  1812                          bootstrap_end-bootstrap_start;
  1813      (p_startup_struct->vmm_memory_layout[3]).total_size = 
  1814                          bootstrap_end-bootstrap_start;
  1815      (p_startup_struct->vmm_memory_layout[3]).base_address =  
  1816                          bootstrap_start;
  1817      (p_startup_struct->vmm_memory_layout[3]).entry_point =  
  1818                          bootstrap_start;
  1819  
  1820      // set up evmm e820 table
  1821      p_startup_struct->physical_memory_layout_E820 = evmm_get_e820_table(mbi);
  1822      if (p_startup_struct->physical_memory_layout_E820 == (uint32_t)-1) {
  1823          bprint("Error getting e820 table\n");
  1824          return 1;
  1825      }
  1826  
  1827      // application parameters
  1828      // This structure is not used so the setting is probably OK.
  1829      evmm_a0.size_of_this_struct = sizeof(VMM_APPLICATION_PARAMS_STRUCT); 
  1830      evmm_a0.number_of_params = 0;
  1831      evmm_a0.session_id = 0;
  1832      evmm_a0.address_entry_list = 0;
  1833      evmm_a0.entry_number = 0;
  1834  #if 0
  1835      evmm_a0.fadt_gpa = NULL;
  1836      evmm_a0.dmar_gpa = NULL;
  1837  #endif
  1838  
  1839      return 0;
  1840  }
  1841  
  1842  
  1843  // -------------------------------------------------------------------------
  1844  
  1845  
  1846  // Functions for relocating images
  1847  
  1848  
  1849  elf64_phdr* get_program_load_header(uint32_t image)
  1850  {
  1851      elf64_hdr*  hdr = (elf64_hdr*) image;
  1852      elf64_phdr* prog_header= NULL;
  1853      int         i;
  1854  
  1855  #ifdef JLMDEBUG1
  1856      bprint("get_program_load_header: %d segments, entry size is %d, offset: 0x%08x\n",
  1857              (int)hdr->e_phnum, (uint32_t)hdr->e_phentsize, (uint32_t)hdr->e_phoff);
  1858  #endif
  1859      for(i=0; i<(int)hdr->e_phnum;i++) {
  1860          prog_header= (elf64_phdr*)(image+(uint32_t)hdr->e_phoff+i*((uint32_t)hdr->e_phentsize));
  1861  #ifdef JLMDEBUG1
  1862          bprint("segment entry: %d 0x%08x, offset: 0x%08x\n",
  1863                  (int)prog_header->p_type, (uint32_t)prog_header->p_vaddr,
  1864                  (uint32_t)prog_header->p_offset);
  1865  #endif
  1866          if(prog_header->p_type==ELF64_PT_LOAD) {
  1867              return prog_header;
  1868          }
  1869      }
  1870      return NULL;
  1871  }
  1872  
  1873  
  1874  #define EM_X86_64 62
  1875  uint64_t OriginalEntryAddress(uint32_t base)
  1876  {
  1877      elf64_hdr* elf= (elf64_hdr*) base;
  1878      if(elf->e_machine!=EM_X86_64)
  1879          return 0ULL;
  1880      return elf->e_entry;
  1881  }
  1882  
  1883  
  1884  // get rid of hole from tboot
  1885  // from 0x4005000 to 0xb83f3000
  1886  /*uint32_t size;
  1887          uint32_t base_addr_low;
  1888          uint32_t base_addr_high;
  1889          uint32_t length_low;
  1890          uint32_t length_high;
  1891          uint32_t type;
  1892   */
  1893  void fixe820map()
  1894  {
  1895      unsigned int           i;
  1896      memory_map_t *entry;
  1897  
  1898      for(i=0; i<g_nr_map; i++) {
  1899          entry= &bootstrap_e820[i];
  1900          if(entry->base_addr_low<0xa0000000 && 
  1901             (entry->base_addr_low+entry->size)>0xa0000000 &&
  1902             entry->base_addr_high==0) {
  1903              entry->type= E820_RAM;
  1904              break;
  1905          }
  1906      }
  1907  }
  1908  
  1909  
  1910  // -------------------------------------------------------------------------
  1911  
  1912  
  1913  // main
  1914  //     tboot jumps in here
  1915  int start32_evmm(uint32_t magic, multiboot_info_t* mbi, uint32_t initial_entry) 
  1916  {
  1917      int         i;
  1918      module_t*   m;
  1919  
  1920      // reinitialize screen printing
  1921      bootstrap_partial_reset();
  1922  #ifdef JLMDEBUG
  1923      bprint("start32_evmm, mbi: %08x, initial_entry: %08x, magic: %08x\n",
  1924              (uint32_t)mbi, initial_entry, magic);
  1925  #endif
  1926  
  1927      // print out the registers
  1928      // set mbi to 0x10000
  1929      bprint("shared_page = %p\n", shared_page);
  1930      mbi = (multiboot_info_t*)0x10000;
  1931      // tboot start/end, this is the only data from the shared
  1932      // page we use
  1933  #ifdef TBOOT_SHARED_PAGE
  1934      tboot_start= shared_page->tboot_base;
  1935      tboot_end= shared_page->tboot_base+shared_page->tboot_size;
  1936  #else
  1937      tboot_start= 0;
  1938      tboot_end= 0;
  1939  #endif
  1940  
  1941      // bootstrap's start (load) address and its end address
  1942      bootstrap_start= (uint32_t)&_start_bootstrap;
  1943      bootstrap_end= (uint32_t)&_end_bootstrap;
  1944  
  1945      // We assume the standard with three modules after bootstrap: 
  1946      //    64-bit evmm, the linux image and initram fs.
  1947      // Everything is decompressed EXCEPT the protected mode portion of linux
  1948      int l= mbi->mods_count;
  1949      if (l!=3) {
  1950          bprint("bootstrap error: wrong number of modules %d\n", l);
  1951          LOOP_FOREVER
  1952      }
  1953  
  1954      m= get_module(mbi, 0);
  1955      evmm_start= (uint32_t)m->mod_start;
  1956      evmm_end= (uint32_t)m->mod_end;
  1957      evmm_command_line= (char*)m->string;
  1958  
  1959      m= get_module(mbi, 1);
  1960      linux_start= (uint32_t)m->mod_start;
  1961      linux_end= (uint32_t)m->mod_end;
  1962      linux_command_line= (char*)m->string;
  1963  
  1964      if(l>2) {
  1965          m= get_module(mbi, 2);
  1966          initram_start= (uint32_t)m->mod_start;
  1967          initram_end= (uint32_t)m->mod_end;
  1968      }
  1969  
  1970  #ifdef JLMDEBUG1
  1971      // shared page
  1972      bprint("tboot_start, tboot_end: 0x%08x 0x%08x\n", tboot_start, tboot_end);
  1973      bprint("bootstrap_start, bootstrap_end: 0x%08x 0x%08x, size: %d\n", 
  1974              bootstrap_start, bootstrap_end, bootstrap_end-bootstrap_start);
  1975      bprint("evmm_start, evmm_end: 0x%08x 0x%08x\n", evmm_start, evmm_end);
  1976      if(evmm_command_line==0)
  1977          bprint("evmm command line is NULL\n");
  1978      else
  1979          bprint("evmm command line: %s\n", evmm_command_line);
  1980      bprint("linux_start, linux_end: 0x%08x 0x%08x\n", linux_start, linux_end);
  1981      if(linux_command_line==0)
  1982          bprint("linux command line is NULL\n");
  1983      else
  1984          bprint("linux command line: %s\n", linux_command_line);
  1985      bprint("initram_start, initram_end: 0x%08x 0x%08x\n", initram_start, initram_end);
  1986  #endif
  1987  
  1988      // initialize stack array
  1989      for(i=0;i<MAXPROCESSORS;i++)
  1990          evmm_stack_pointers_array[i]=  0;
  1991  
  1992      // get CPU info
  1993      uint32_t info;
  1994      __asm__ volatile (
  1995          "\tmovl    $1, %%eax\n"
  1996          "\tcpuid\n"
  1997          "\tmovl    %%ebx, %[info]\n"
  1998      : [info] "=m" (info)
  1999      : : "%eax", "%ebx", "%ecx", "%edx");
  2000      // NOTE: changed shift from 16 to 18 to get the right answer
  2001      evmm_num_of_aps = ((info>>18)&0xff)-1;
  2002      if(evmm_num_of_aps < 0)
  2003          evmm_num_of_aps = 0; 
  2004      if(evmm_num_of_aps>=MAXPROCESSORS) {
  2005          bprint("Too many aps (%d), resetting to %d\n", evmm_num_of_aps, MAXPROCESSORS);
  2006          evmm_num_of_aps = MAXPROCESSORS-1; 
  2007      }
  2008  #ifndef MULTIAPS_ENABLED
  2009      evmm_num_of_aps = 0;
  2010  #endif
  2011  
  2012      // get tboot gdtr
  2013      __asm__ volatile (
  2014         "\tsgdt  %[tboot_gdtr_32]\n"
  2015     :[tboot_gdtr_32] "=m" (tboot_gdtr_32)
  2016     ::);
  2017  
  2018      // make a fake TSS to keep vmcs happy
  2019      *((UINT64*)tboot_gdtr_32.base+24)= 0x00808b0000000000ULL;
  2020      *((UINT64*)tboot_gdtr_32.base+32)= 0ULL;
  2021      tboot_gdtr_32.limit= 39;
  2022      tboot_tr_attr= 0x0000808b;
  2023      tboot_tr_limit= 0xffff;
  2024      ia32_write_ts(24);
  2025  
  2026      tboot_cs_selector= ia32_read_cs();
  2027      tboot_ds_selector= ia32_read_ds();
  2028      tboot_ss_selector= ia32_read_ss();
  2029      tboot_tr_selector= ia32_read_ts();
  2030      ia32_read_msr(IA32_MSR_DEBUGCTL, &tboot_msr_debugctl);
  2031      ia32_read_msr(IA32_MSR_EFER, &tboot_msr_efer);
  2032      ia32_read_msr(IA32_MSR_PAT, &tboot_msr_pat);
  2033      ia32_read_msr(IA32_MSR_SYSENTER_ESP, &tboot_msr_sysenter_esp);
  2034      ia32_read_msr(IA32_MSR_SYSENTER_EIP, &tboot_msr_sysenter_eip);
  2035      ia32_read_msr(IA32_MSR_SYSENTER_CS, &tboot_msr_sysenter_cs);
  2036      read_cr0(&tboot_cr0);
  2037      read_cr2(&tboot_cr2);
  2038      read_cr3(&tboot_cr3);
  2039      read_cr4(&tboot_cr4);
  2040  
  2041  #ifdef JLMDEBUG1
  2042      bprint("original gdt\n");
  2043      HexDump((uint8_t*) tboot_gdtr_32.base, 
  2044              (uint8_t*) tboot_gdtr_32.base+tboot_gdtr_32.limit);
  2045  #endif
  2046  
  2047      uint32_t*  p;
  2048      p= (uint32_t*)(tboot_gdtr_32.base+tboot_cs_selector); 
  2049      ia32_get_selector(p, &tboot_cs_base, &tboot_cs_limit, &tboot_cs_attr);
  2050      p= (uint32_t*)(tboot_gdtr_32.base+tboot_ds_selector); 
  2051      ia32_get_selector(p, &tboot_ds_base, &tboot_ds_limit, &tboot_ds_attr);
  2052      p= (uint32_t*)(tboot_gdtr_32.base+tboot_ss_selector); 
  2053      ia32_get_selector(p, &tboot_ss_base, &tboot_ss_limit, &tboot_ss_attr);
  2054      p= (uint32_t*)(tboot_gdtr_32.base+tboot_tr_selector); 
  2055      ia32_get_selector(p, &tboot_tr_base, &tboot_tr_limit, &tboot_tr_attr);
  2056  
  2057  #ifdef JLMDEBUG1
  2058      bprint("gdt base: %08x, limit: %04x\n", tboot_gdtr_32.base, tboot_gdtr_32.limit);
  2059      bprint("tboot cs selector: %04x, base: %08x, limit: %04x, attr: %04x\n",
  2060             tboot_cs_selector, tboot_cs_base, tboot_cs_limit, tboot_cs_attr);
  2061      bprint("tboot ds selector: %04x, base: %08x, limit: %04x, attr: %04x\n",
  2062             tboot_ds_selector, tboot_ds_base, tboot_ds_limit, tboot_ds_attr);
  2063      bprint("tboot ss selector: %04x, base: %08x, limit: %04x, attr: %04x\n",
  2064             tboot_ss_selector, tboot_ss_base, tboot_ss_limit, tboot_ss_attr);
  2065      bprint("tboot tr selector: %04x, base: %08x, limit: %04x, attr: %04x\n",
  2066             tboot_tr_selector, tboot_tr_base, tboot_tr_limit, tboot_tr_attr);
  2067      HexDump((uint8_t*) tboot_gdtr_32.base, 
  2068              (uint8_t*) tboot_gdtr_32.base+tboot_gdtr_32.limit);
  2069      // LOOP_FOREVER
  2070      bprint("msr_debugctl: 0x%016llx\n", tboot_msr_debugctl);
  2071      bprint("msr_efer: 0x%016llx\n", tboot_msr_efer);
  2072      bprint("msr_pat: 0x%016llx\n", tboot_msr_pat);
  2073      bprint("msr_sysenter_esp: 0x%016llx\n", tboot_msr_sysenter_esp);
  2074      bprint("msr_sysenter_eip: 0x%016llx\n", tboot_msr_sysenter_eip);
  2075      bprint("msr_sysenter_cs: 0x%016llx\n", tboot_msr_sysenter_cs);
  2076  #endif
  2077  
  2078      init32.i32_low_memory_page = low_mem;
  2079      init32.i32_num_of_aps = evmm_num_of_aps;
  2080  
  2081      // set up evmm heap addresses and range
  2082      setup_evmm_heap(EVMM_HEAP_BASE, EVMM_HEAP_SIZE);
  2083  
  2084      // Relocate evmm_image 
  2085      evmm_start_address= EVMM_DEFAULT_START_ADDR;
  2086      elf64_phdr* prog_header=  get_program_load_header(evmm_start);
  2087      if(prog_header==NULL) {
  2088          bprint("Cant find load program header\n");
  2089          LOOP_FOREVER
  2090      }
  2091  
  2092      uint32_t evmm_start_load_segment= 0;
  2093      evmm_start_load_segment= evmm_start+((uint32_t)prog_header->p_offset);
  2094      evmm_image_size= (uint32_t) prog_header->p_filesz;
  2095      evmm_orig_mem_size= (uint32_t) prog_header->p_memsz;
  2096      evmm_mem_size= PAGE_UP(evmm_orig_mem_size);  // full page
  2097      evmm_total_size= evmm_mem_size+UVMM_DEFAULT_HEAP;         
  2098  
  2099      if(((uint32_t)(prog_header->p_vaddr))!=evmm_start_address) {
  2100          bprint("evmm load address is not default: 0x%08x, actual: 0x%08x\n",
  2101                  evmm_start_address, evmm_start_load_segment);
  2102          LOOP_FOREVER
  2103      }
  2104  
  2105      vmm_memcpy((void *)evmm_start_address, 
  2106                 (const void*) evmm_start_load_segment,
  2107                 evmm_image_size);
  2108      // zero everything after image
  2109      vmm_memset((void *)(evmm_start_address+evmm_image_size),
  2110                 0, evmm_total_size-evmm_image_size);
  2111  
  2112      // Get entry point
  2113      vmm_main_entry_point =  (uint32_t)OriginalEntryAddress(evmm_start);
  2114      if(vmm_main_entry_point==0) {
  2115          bprint("OriginalEntryAddress: bad elf format\n");
  2116          LOOP_FOREVER
  2117      }
  2118  
  2119  #ifdef JLMDEBUG1
  2120      bprint("\tevmm_heap_base evmm_heap_size: 0x%08x 0x%08x\n", 
  2121              evmm_heap_base, evmm_heap_size);
  2122      bprint("\trelocated evmm_start_address: 0x%08x\nvmm_main_entry_point: 0x%08x\n", 
  2123              evmm_start_address, vmm_main_entry_point);
  2124      bprint("\tprogram header load address: 0x%08x, image size: 0x%08x, total size: 0x%08x\n",
  2125              (uint32_t)(prog_header->p_vaddr), evmm_image_size, evmm_total_size);
  2126  #endif
  2127  
  2128      // setup our e820 table (20 byte format)
  2129      max_e820_entries= E820MAX;   // copied e820 globals
  2130      g_nr_map= 0;
  2131      g_copy_e820_map= bootstrap_e820;
  2132      if(copy_e820_map(mbi)!=true) {
  2133          bprint("cant copy e820 map\n");
  2134          LOOP_FOREVER
  2135      }
  2136  
  2137  #ifdef JLMDEBUG1
  2138      bprint("%d e820 entries after copy, original had %d\n", 
  2139              g_nr_map, mbi->mmap_length/sizeof(memory_map_t));
  2140  #endif
  2141  
  2142      // tboot reserves the region were putting stuff in (see below)
  2143      // Tboot's explaination
  2144      //      Tboot reserves the following regions: 0x20200000 - 0x40004000,
  2145      //      0x40005000 - 0xb88f3000, 0xb9bff000 - 0xb9c00000
  2146      //      because some legacy bios's put USB buffers there
  2147      //      which causes problems if they are DMA protected.
  2148      //      we're going to ignore this because we want to
  2149      //      put bootstrap and evmm here.
  2150      // What BIOS's have this problem?
  2151      // Answer:
  2152      // Both igfx & legacy USB support might reserve small chunks of memory 
  2153      // (0x20000000~0x20200000 is for igfx, 0x40004000~0x40005000 is for usb). 
  2154      // Even in latest Lenovo laptop these memory holes is still existing.
  2155      // Tboot added a min_ram=0xXXXXXXXX option to get the usable memory 
  2156      // back if the size exceeds the number in this option, but the side 
  2157      // effect is the device memory before the reenabled usabled ram will 
  2158      // be DMA protected.
  2159      fixe820map();
  2160  
  2161      // reserve bootstrap
  2162      if(!e820_reserve_ram(bootstrap_start, (bootstrap_end - bootstrap_start))) {
  2163        bprint("Unable to reserve bootstrap region in e820 table\n");
  2164        LOOP_FOREVER
  2165      } 
  2166  
  2167      // JLM(FIX):the start and end should be at page boundries
  2168      // reserve evmm area
  2169      // need to include additional heap too evmm_total_size
  2170      if (!e820_reserve_ram(evmm_heap_base, (evmm_heap_base+evmm_heap_size+evmm_total_size))) {
  2171          bprint("Unable to reserve evmm region in e820 table\n");
  2172          LOOP_FOREVER
  2173      }
  2174  
  2175  #ifdef JLMDEBUG1
  2176      bprint("start of evmm exclusion: 0x%08x, end of exclusion: 0x%08lx\n",
  2177             evmm_heap_base, evmm_heap_base+evmm_heap_size+evmm_total_size);
  2178      bprint("%d e820 entries after new reservations\n", g_nr_map);
  2179      print_map(&g_copy_e820_map[5], 10);
  2180  #endif
  2181  
  2182  #ifdef SETUPIDT
  2183      extern void SetupIDT(); // this is not needed
  2184      SetupIDT();
  2185  #endif
  2186  
  2187      // setup gdt for 64-bit on BSP
  2188      if(setup_64bit()!=0) {
  2189        bprint("Unable to setup 64 bit paging\n");
  2190        LOOP_FOREVER
  2191      }
  2192  
  2193      // Allocate stack and set rsp (esp)
  2194      if(setup_evmm_stacks()!=0) {
  2195        bprint("can't allocate stack\n");
  2196        LOOP_FOREVER
  2197      }
  2198  
  2199  #ifdef JLMDEBUG1
  2200      bprint("evmm_bsp_stack: 0x%08x\n", evmm_bsp_stack);
  2201      bprint("%d processors, stacks:\n", 1+evmm_num_of_aps);
  2202      for(i=0; i<=evmm_num_of_aps; i++) {
  2203          bprint("%08x ", evmm_stack_pointers_array[i]);
  2204      }
  2205      bprint("\n");
  2206  #endif
  2207  
  2208      // this is used by the wakup code in sipi init
  2209      // CHECK(JLM): maybe this should be  &evmm_stack_pointers_array[1]
  2210      init32.i32_esp= evmm_stack_pointers_array;
  2211  
  2212      // We need to allocate this before guest setup
  2213      if(allocate_linux_data()!=0) {
  2214        bprint("Cant allocate data area for primary linux guest\n");
  2215        LOOP_FOREVER
  2216      }
  2217  
  2218      // mark linux data area as reserved
  2219  #ifdef LOWBOOTPARAMS
  2220      if(!e820_reserve_ram(linux_stack_base,
  2221                           evmm_heap_base-linux_stack_base)) {
  2222        bprint("Unable to reserve bootstrap region in e820 table\n");
  2223        LOOP_FOREVER
  2224      }
  2225      if(!e820_reserve_ram(linux_boot_parameters, 2*PAGE_SIZE)) {
  2226        bprint("Unable to reserve bootstrap region in e820 table\n");
  2227        LOOP_FOREVER
  2228      }
  2229  #else
  2230      if(!e820_reserve_ram(linux_boot_parameters, 
  2231                           evmm_heap_base-linux_boot_parameters)) {
  2232        bprint("Unable to reserve bootstrap region in e820 table\n");
  2233        LOOP_FOREVER
  2234      }
  2235  #endif
  2236  
  2237      // prepare linux for evmm
  2238      if(prepare_linux_image_for_evmm(mbi)) {
  2239          bprint("Cant prepare linux image\n");
  2240          LOOP_FOREVER
  2241      }
  2242  
  2243      // copy linux data that is passed to linux in call
  2244      if(prepare_primary_guest_environment(&linux_mbi)!=0) {
  2245          bprint("Error setting up evmm startup arguments\n");
  2246          LOOP_FOREVER
  2247      }
  2248  
  2249  #ifdef JLMDEBUG1
  2250      // Print final parameters for linux
  2251      boot_params_t* new_boot_params= (boot_params_t*)linux_boot_parameters;
  2252      bprint("Final Linux parameters\n");
  2253      bprint("\tShared page address: 0x%016lx %d e820 entries\n", 
  2254             (long unsigned int)*(uint64_t*)new_boot_params->tboot_shared_addr,
  2255             new_boot_params->e820_entries);
  2256      bprint("\tCode32_start: 0x%08x, ramdisk: 0x%08x, ramdisk size: %d\n",
  2257             new_boot_params->hdr.code32_start,
  2258             new_boot_params->hdr.ramdisk_image,
  2259             new_boot_params->hdr.ramdisk_size);
  2260      char* s= (char*) new_boot_params->hdr.cmd_line_ptr;
  2261      if(s!=NULL || vmm_strlen(s)<100) {
  2262          bprint("\tcommand line: %s\n", s);
  2263      }
  2264      else {
  2265          bprint("\tinvalid command line\n");
  2266      }
  2267  #endif
  2268  #ifdef JLMDEBUG1
  2269      bprint("code at evmm start\n");
  2270      HexDump((uint8_t*)evmm_start_address, (uint8_t*)evmm_start_address+10);
  2271      HexDump((uint8_t*)linux_start_address, (uint8_t*)linux_start_address+10);
  2272  #endif
  2273  
  2274      // FIX (JLM):  In evmm, exclude tboot and bootstrap areas from primary space
  2275      // CHECK (JLM):  check that everything is measured (bootstrap, evmm)
  2276  
  2277      // set up evmm stack for vmm_main call and flip tp 64 bit mode
  2278      //  vmm_main call:
  2279      //      vmm_main(uint32_t local_apic_id, uint64_t startup_struct_u, 
  2280      //               uint64_t application_params_struct_u, 
  2281      //               uint64_t reserved UNUSED)
  2282  
  2283  #ifdef JLMDEBUG1
  2284      bprint("evmm_image_size: 0x%016lx, evmm_mem_size: 0x%016lx\n",
  2285              (long unsigned int)evmm_image_size, (long unsigned int)evmm_mem_size);
  2286      bprint("Orig extra mem: 0x%016lx, diff: 0x%016lx\n",
  2287              (long unsigned int)evmm_orig_mem_size, 
  2288              (long unsigned int)(evmm_orig_mem_size-evmm_image_size));
  2289      bprint("evmm_total_size: 0x%016lx\n",
  2290              (long unsigned int)evmm_total_size);
  2291      bprint("Evmm descriptor table, cs selector: 0x%08x, cr3: 0x%08x\n", 
  2292             (uint32_t) evmm64_cs_selector, (uint32_t) evmm64_cr3);
  2293      HexDump((uint8_t*)gdtr_64.base, 
  2294              (uint8_t*)gdtr_64.base+gdtr_64.limit);
  2295      bprint("Tboot descriptors, cs_sel: %04x, ds_sel: %04x, ss_sel: %04x, tr_sel: %04x:\n",
  2296              tboot_cs_selector,tboot_ds_selector,
  2297              tboot_ss_selector, tboot_tr_selector);
  2298      bprint("Tboot attrs, cs_attr: %08x, ds_attr: %08x, ss_attr: %08x, tr_attr: %08x:\n",
  2299              tboot_cs_attr,tboot_ds_attr,
  2300              tboot_ss_attr, tboot_tr_attr);
  2301      bprint("Tboot limits, cs_limit: %04x, ds_limit: %04x, ss_limit: %04x, tr_limit: %04x:\n",
  2302              tboot_cs_limit,tboot_ds_limit, tboot_ss_limit, tboot_tr_limit);
  2303      HexDump((uint8_t*)(tboot_gdtr_32.base), 
  2304              (uint8_t*)(tboot_gdtr_32.base+tboot_gdtr_32.limit));
  2305      bprint("cr0: %08x, cr2: %08x, cr3: %08x, cr4: %08x\n", guest_cr0, 
  2306             guest_cr2, guest_cr3, guest_cr4);
  2307      bprint("Guest descriptor table, cs_sel: %04x, ds_sel: %04x, ss_sel: %04x, tr_sel: %04x:\n",
  2308              guest_cs_selector, guest_ds_selector,
  2309              guest_ss_selector, guest_tr_selector);
  2310      HexDump((uint8_t*)(guest_gdtr.base), 
  2311              (uint8_t*)(guest_gdtr.base+guest_gdtr.limit));
  2312      bprint("arguments to vmm_main: ");
  2313      bprint("apic %d, p_startup_struct, 0x%08x\n",
  2314         (int) local_apic_id, (int) p_startup_struct);
  2315      bprint("linux stack base: %08x, linux_edi_reg: %08x\n",
  2316             linux_stack_base, linux_edi_register);
  2317      bprint("\tapplication struct 0x%08x, reserved, 0x%08x\n",
  2318             (int)evmm_p_a0, (int)evmm_reserved);
  2319  #endif
  2320  #ifdef JLMDEBUG1
  2321      boot_params_t* linux_boot_params= (boot_params_t*)linux_boot_parameters;
  2322      bprint("bootparams at: %p, e820 map in boot params to linux (%d entries)\n",
  2323            linux_boot_params, linux_boot_params->e820_entries);
  2324      bprint("command line: %s\n", 
  2325             (char*) (linux_boot_parameters+sizeof(boot_params_t)));
  2326      e820entry_t* pent= linux_boot_params->e820_map;
  2327      for(i=0; i<linux_boot_params->e820_entries;i++) {
  2328          bprint("\taddr: %016llx, size: %lld, type: %d\n",
  2329                  pent->addr, pent->size, pent->type);
  2330          pent++;
  2331      }
  2332  #endif
  2333  
  2334      if(evmm64_cs_selector!=64) {
  2335          bprint("cs_selector is wrong, %0x\n", evmm64_cs_selector);
  2336          LOOP_FOREVER
  2337      }
  2338  
  2339      if (evmm_num_of_aps > 0) {
  2340  #ifdef JLMDEBUG
  2341          bprint("about to call startap_main, %d aps\n", evmm_num_of_aps);
  2342  #endif
  2343          startap_main(&init32, &init64, p_startup_struct, vmm_main_entry_point);
  2344      }
  2345  
  2346      __asm__ volatile (
  2347  
  2348          "\tcli\n"
  2349  
  2350          // move entry point to ebx for jump
  2351          "\tmovl %[vmm_main_entry_point], %%ebx\n"
  2352  
  2353          // initialize CR3 with PML4 base
  2354          "\tmovl %[evmm64_cr3], %%eax\n"
  2355          "\tmovl %%eax, %%cr3 \n"
  2356  
  2357          // evmm_initial_stack points to the start of the stack
  2358          "\tmovl   %[evmm_bsp_stack], %%esp\n"
  2359          "\tandl  $0xfffffff8, %%esp\n"
  2360  
  2361          // prepare arguments for 64-bit mode
  2362          // there are 4 arguments (including reserved)
  2363          "\txor  %%eax, %%eax\n"
  2364          "\tpush %%eax\n"
  2365          "\tpush %[evmm_reserved]\n"
  2366          "\tpush %%eax\n"
  2367          "\tpush %[evmm_p_a0]\n"
  2368          "\tpush %%eax\n"
  2369          "\tpush %[p_startup_struct]\n"
  2370          "\tpush %%eax\n"
  2371          "\tpush %[local_apic_id]\n"
  2372  
  2373          // enable 64-bit mode
  2374          // EFER MSR register
  2375          "\tmovl $0x0c0000080, %%ecx\n"
  2376          // read EFER into EAX
  2377          "\trdmsr\n"
  2378  
  2379          // set EFER.LME=1
  2380          "\tbts $8, %%eax\n"
  2381          // write EFER
  2382          "\twrmsr\n"
  2383  
  2384          // enable paging CR0.PG=1
  2385          "\tmovl %%cr0, %%eax\n"
  2386          "\tbts  $31, %%eax\n"
  2387          "\tmovl %%eax, %%cr0\n"
  2388  
  2389          // at this point we are in 32-bit compatibility mode
  2390          // LMA=1, CS.L=0, CS.D=1
  2391          // jump from 32bit compatibility mode into 64bit mode.
  2392  
  2393          // mode switch
  2394          "ljmp   $64, $1f\n"
  2395  
  2396  "1:\n"
  2397          // in 64 bit this is actually pop rdi (local apic)
  2398          "\tpop %%edi\n"
  2399          // in 64 bit this is actually pop rsi (startup struct)
  2400          "\tpop %%esi\n"
  2401          // in 64 bit this is actually pop rdx (application struct)
  2402          "\tpop %%edx\n"
  2403          // in 64 bit this is actually pop rcx (reserved)
  2404          "\tpop %%ecx\n"
  2405  
  2406          "\tjmp *%%ebx\n"
  2407          "\tud2\n"
  2408      :: [vmm_main_entry_point] "m" (vmm_main_entry_point), 
  2409         [evmm_bsp_stack] "m" (evmm_bsp_stack), [evmm64_cr3] "m" (evmm64_cr3),
  2410         [evmm64_cs_selector] "m" (evmm64_cs_selector), 
  2411         [evmm_reserved] "m" (evmm_reserved), 
  2412         [evmm_p_a0] "m" (evmm_p_a0), [p_startup_struct] "m" (p_startup_struct),
  2413         [local_apic_id] "m" (local_apic_id)
  2414      :);
  2415  
  2416      return 0;
  2417  }
  2418