github.com/jlmucb/cloudproxy@v0.0.0-20170830161738-b5aa0b619bc4/cpvmm/vmm/bootstrap/bootstrap_entry.c (about) 1 /* 2 * File: bootstrap_entry.c 3 * Description: Get tbooted and boot 64 bit evmm 4 * Author: John Manferdelli 5 * 6 * Copyright (c) 2013 Intel Corporation 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 20 #include "bootstrap_types.h" 21 #include "bootstrap_string.h" 22 #include "bootstrap_print.h" 23 #include "bootstrap_ia.h" 24 25 #include "multiboot.h" 26 #include "e820.h" 27 #include "elf64.h" 28 #include "linux_defns.h" 29 #include "elf_defns.h" 30 #include "tboot.h" 31 32 #include "vmm_defs.h" 33 #include "em64t_defs.h" 34 #include "ia32_defs.h" 35 #include "ia32_low_level.h" 36 #include "x32_init64.h" 37 #include "vmm_startup.h" 38 39 40 // this is all 32 bit code 41 42 #define JLMDEBUG 43 44 #define MULTIAPS_ENABLED 45 46 // JLM: Remove this soon 47 #ifdef TBOOT_SHARED_PAGE 48 tboot_shared_t *shared_page = (tboot_shared_t *)0x829000; 49 #else 50 tboot_shared_t *shared_page = (tboot_shared_t *)NULL; 51 #endif 52 53 54 #define PSE_BIT 0x10 55 #define PAE_BIT 0x20 56 #define PAGE_SIZE (1024 * 4) 57 #define PAGE_MASK (~(PAGE_SIZE-1)) 58 #define PAGE_UP(a) ((a+(PAGE_SIZE-1))&PAGE_MASK) 59 #define MAXPROCESSORS 64 60 61 62 #define LOWBOOTPARAMS 63 64 65 // ------------------------------------------------------------------------- 66 67 68 // These are the variables for machine setup 69 // Consult evmm-init-notes.txt 70 71 // start and end of bootstrap, no header, start is load address 72 extern uint32_t _start_bootstrap, _end_bootstrap; 73 74 75 #define EVMM_DEFAULT_START_ADDR 0x70000000 76 #define LINUX_DEFAULT_LOAD_ADDRESS 0x100000 77 #define EVMM_HEAP_SIZE 0x100000 78 #define EVMM_HEAP_BASE (EVMM_DEFAULT_START_ADDR- EVMM_HEAP_SIZE) 79 #define UVMM_DEFAULT_HEAP 0x5000000 80 81 82 // Memory layout on start32_evmm entry 83 uint32_t tboot_start= 0; // tboot image start address 84 uint32_t tboot_end= 0; // tboot image end address 85 uint32_t bootstrap_start= 0; // bootstrap image start address 86 uint32_t bootstrap_end= 0; // bootstrap image end address 87 uint32_t evmm_start= 0; // location of evmm start 88 uint32_t evmm_end= 0; // location of evmm image start 89 uint32_t linux_start= 0; // location of linux imag start 90 uint32_t linux_end= 0; // location of evmm start 91 uint32_t initram_start= 0; // location of initram image start 92 uint32_t initram_end= 0; // location of initram image end 93 94 95 // Post relocation addresses 96 uint32_t evmm_start_address= 0; // address of evmm after relocation 97 uint32_t evmm_image_size= 0; // image size 98 uint32_t evmm_orig_mem_size= 0; // original memsize 99 uint32_t evmm_mem_size= 0; // memory size (rounded up) 100 uint32_t evmm_total_size= 0; // total size (includes heap) 101 uint32_t vmm_main_entry_point= 0; // address of vmm_main after relocation 102 uint32_t evmm_heap_base= 0; // start of initial evmm heap 103 uint32_t evmm_heap_current= 0; 104 uint32_t evmm_heap_top= 0; 105 uint32_t evmm_heap_size= 0; // size of initial evmm heap 106 uint32_t evmm_bsp_stack_base= 0; // low address where stack is allocated 107 uint32_t evmm_bsp_stack= 0; // initial bsp evmm stack 108 uint32_t evmm_stack_pointers_array[MAXPROCESSORS]; 109 char* evmm_command_line= NULL; // evmm command line 110 111 multiboot_info_t linux_mbi; // mbi for linux 112 113 extern unsigned int max_e820_entries; // copied e820 globals 114 extern unsigned int g_nr_map; // copied e820 globals 115 extern memory_map_t *g_copy_e820_map; // copied e820 globals 116 memory_map_t bootstrap_e820[E820MAX]; // our e820 map 117 118 // linux guest 119 uint32_t linux_real_mode_start= 0; // address of real mode 120 uint32_t linux_real_mode_size= 0; // size of real mode size 121 uint32_t linux_protected_mode_start= 0; // address of protected mode 122 uint32_t linux_protected_mode_size= 0; // size of protected mode 123 uint32_t linux_start_address= 0; // start address of image 124 uint32_t initram_start_address= 0; // address of the initram 125 uint32_t linux_entry_address= 0; // address of the eip guest entry 126 uint32_t linux_edi_register= 0; // edi register on guest entry 127 uint32_t linux_esp_register= 0; // esp on guest entry 128 uint32_t linux_stack_base= 0; // base of the stack on entry 129 uint32_t linux_stack_size= 0; // stack size on guest entry 130 char* linux_command_line= NULL; // old command line 131 char* new_cmdline= NULL; // new command line 132 133 // boot parameters for linux guest 134 uint32_t linux_original_boot_parameters= 0; 135 uint32_t linux_boot_parameters= 0; 136 137 138 // ------------------------------------------------------------------------- 139 140 141 // initial evmm heap implementation 142 143 144 void setup_evmm_heap(uint32_t heap_base_address, uint32_t heap_bytes) 145 { 146 evmm_heap_current = evmm_heap_base = heap_base_address; 147 evmm_heap_top = evmm_heap_base + heap_bytes; 148 evmm_heap_size= heap_bytes; 149 } 150 151 152 void *evmm_page_alloc(uint32_t pages) 153 { 154 uint32_t address; 155 uint32_t size = pages * PAGE_SIZE; 156 157 address = ALIGN_FORWARD(evmm_heap_current, PAGE_SIZE); 158 evmm_heap_current = address + size; 159 vmm_memset((void*)address, 0, size); 160 return (void*)address; 161 } 162 163 164 // ------------------------------------------------------------------------- 165 166 167 // Machine state and mode transition data structures 168 #define TOTAL_MEM 0x100000000ULL 169 170 static uint32_t evmm64_cs_selector= 0; 171 static uint32_t evmm64_ds_selector= 0; 172 173 static uint32_t evmm64_cr4= 0; 174 static uint32_t evmm64_cr3 = 0; 175 176 static IA32_GDTR gdtr_32; 177 static IA32_GDTR gdtr_64; // still in 32-bit mode 178 179 // location of page in evmm heap that holds 64 bit descriptor table 180 static uint32_t evmm_descriptor_table= 0; 181 static EM64T_PML4 * pml4_table= NULL; 182 static EM64T_PDPE * pdp_table= NULL; 183 static EM64T_PDE_2MB * pd_table= NULL; 184 185 static INIT64_STRUCT init64; 186 static INIT32_STRUCT init32; 187 188 uint32_t low_mem = 0x8000; 189 int evmm_num_of_aps= 0; 190 static uint64_t evmm_reserved = 0; 191 static uint32_t local_apic_id = 0; 192 193 static IA32_GDTR tboot_gdtr_32; 194 uint16_t tboot_cs_selector= 0; 195 static uint32_t tboot_cs_base= 0; 196 static uint16_t tboot_cs_limit= 0; 197 static uint16_t tboot_cs_attr= 0; 198 uint16_t tboot_ds_selector= 0; 199 static uint32_t tboot_ds_base= 0; 200 static uint16_t tboot_ds_limit= 0; 201 static uint16_t tboot_ds_attr= 0; 202 uint16_t tboot_ss_selector= 0; 203 static uint32_t tboot_ss_base= 0; 204 static uint16_t tboot_ss_limit= 0; 205 static uint16_t tboot_ss_attr= 0; 206 static uint64_t tboot_msr_debugctl= 0; 207 static uint64_t tboot_msr_efer= 0; 208 static uint64_t tboot_msr_pat= 0; 209 static uint64_t tboot_msr_sysenter_cs= 0; 210 static uint64_t tboot_msr_sysenter_esp= 0; 211 static uint64_t tboot_msr_sysenter_eip= 0; 212 static uint16_t tboot_tr_selector= 0; 213 static uint32_t tboot_tr_base= 0; 214 static uint16_t tboot_tr_limit= 0; 215 static uint16_t tboot_tr_attr= 0; 216 217 static uint32_t tboot_cr0= 0; 218 static uint32_t tboot_cr2= 0; 219 static uint32_t tboot_cr3= 0; 220 static uint32_t tboot_cr4= 0; 221 222 // guest gdt 223 static IA32_GDTR __attribute__((aligned (16))) guest_gdtr; 224 uint64_t __attribute__((aligned (16))) guest_gdt[16] = { 225 0x0000000000000000, 226 0x0000000000000000, // NULL descriptor 227 0x00c09b000000ffff, // __KERNEL_CS 228 0x00c093000000ffff, // __KERNEL_DS 229 0x00808b000000ffff, // TS descriptor 230 0x0000000000000000, // TS continued 231 0x0000000000000000, 232 0x0000000000000000, 233 0x0000000000000000, 234 0x0000000000000000, 235 0x0000000000000000, 236 0x0000000000000000, 237 0x0000000000000000, 238 0x0000000000000000, 239 0x0000000000000000, 240 0x0000000000000000, 241 }; 242 static uint16_t guest_cs_selector= 0; 243 static uint32_t guest_cs_base= 0; 244 static uint32_t guest_cs_limit= 0; 245 static uint16_t guest_cs_attr= 0; 246 static uint16_t guest_ds_selector= 0; 247 static uint32_t guest_ds_base= 0; 248 static uint32_t guest_ds_limit= 0; 249 static uint16_t guest_ds_attr= 0; 250 static uint16_t guest_ss_selector= 0; 251 static uint32_t guest_ss_base= 0; 252 static uint32_t guest_ss_limit= 0; 253 static uint16_t guest_ss_attr= 0; 254 static uint64_t guest_msr_debugctl= 0; 255 static uint64_t guest_msr_efer= 0; 256 static uint64_t guest_msr_pat= 0; 257 static uint64_t guest_msr_sysenter_cs= 0; 258 static uint64_t guest_msr_sysenter_esp= 0; 259 static uint64_t guest_msr_sysenter_eip= 0; 260 static uint16_t guest_tr_selector= 0; 261 static uint32_t guest_tr_base= 0; 262 static uint32_t guest_tr_limit= 0; 263 static uint16_t guest_tr_attr= 0; 264 265 static uint32_t guest_cr0= 0; 266 static uint32_t guest_cr2= 0; 267 static uint32_t guest_cr3= 0; 268 static uint32_t guest_cr4= 0; 269 270 271 // ------------------------------------------------------------------------- 272 273 274 // linux guest initialization definitions and globals 275 276 typedef struct VMM_INPUT_PARAMS_S { 277 uint64_t local_apic_id; 278 uint64_t startup_struct; 279 uint64_t guest_params_struct; 280 } VMM_INPUT_PARAMS; 281 282 VMM_GUEST_STARTUP evmm_g0; 283 VMM_APPLICATION_PARAMS_STRUCT evmm_a0; 284 VMM_APPLICATION_PARAMS_STRUCT* evmm_p_a0= &evmm_a0; 285 286 // state of primary linux guest on startup 287 VMM_GUEST_CPU_STARTUP_STATE guest_processor_state[MAXPROCESSORS]; 288 VMM_STARTUP_STRUCT startup_struct; 289 VMM_STARTUP_STRUCT * p_startup_struct = &startup_struct; 290 291 #define PRIMARY_MAGIC 0x0 292 293 // expanded e820 table used by evmm 294 static unsigned int evmm_num_e820_entries = 0; 295 INT15_E820_MEMORY_MAP* evmm_e820= NULL; 296 uint64_t evmm_start_of_e820_table= 0ULL; // 64 bits version 297 298 static const cmdline_option_t linux_cmdline_options[] = { 299 { "vga", "" }, 300 { "mem", "" }, 301 { NULL, NULL } 302 }; 303 static char linux_param_values[ARRAY_SIZE(linux_cmdline_options)][MAX_VALUE_LEN]; 304 305 306 // ------------------------------------------------------------------------- 307 308 309 // machine setup and paging 310 311 312 int ia32_get_selector(uint32_t* p, uint32_t* base, uint16_t* limit, uint16_t* access) 313 { 314 IA32_SEGMENT_DESCRIPTOR* desc= (IA32_SEGMENT_DESCRIPTOR*)p; 315 IA32_SEGMENT_DESCRIPTOR_ATTR attr; 316 317 *base= (UINT32)((desc->gen.lo.base_address_15_00) | 318 (desc->gen.hi.base_address_23_16 << 16) | 319 (desc->gen.hi.base_address_31_24 << 24)); 320 *limit= desc->gen.lo.limit_15_00; 321 if(desc->gen.hi.granularity) 322 *limit = (*limit << 12) | 0x00000fff; 323 attr.attr16 = desc->gen_attr.attributes; 324 attr.bits.limit_19_16 = 0; 325 *access= (uint32_t) attr.attr16; 326 return 0; 327 } 328 329 330 void ia32_write_ts(uint16_t ts) 331 { 332 __asm__ volatile( 333 "\tmovw %[ts],%%ax\n" 334 "\tltr %%ax\n" 335 ::[ts] "g" (ts) 336 : "%ax"); 337 } 338 339 340 uint16_t ia32_read_ts() 341 { 342 uint16_t ts= 0; 343 344 __asm__ volatile( 345 "\txorw %%ax,%%ax\n" 346 "\tstr %%ax\n" 347 "\tmovw %%ax, %[ts]\n" 348 :[ts] "=g" (ts) 349 : : "%ax"); 350 return ts; 351 } 352 353 354 uint16_t ia32_read_cs() 355 { 356 uint16_t cs= 0; 357 358 __asm__ volatile( 359 "\txorw %%ax,%%ax\n" 360 "\tmovw %%cs, %%ax\n" 361 "\tmovw %%ax, %[cs]\n" 362 :[cs] "=g" (cs) 363 : : "%ax"); 364 return cs; 365 } 366 367 368 uint16_t ia32_read_ds() 369 { 370 uint16_t ds= 0; 371 372 __asm__ volatile( 373 "\txorw %%ax,%%ax\n" 374 "\tmovw %%ds,%%ax\n" 375 "\tmovw %%ax, %[ds]\n" 376 :[ds] "=g" (ds) 377 : : "%ax"); 378 return ds; 379 } 380 381 382 uint16_t ia32_read_es() 383 { 384 uint16_t es= 0; 385 386 __asm__ volatile( 387 "\txorw %%ax,%%ax\n" 388 "\tmovw %%es,%%ax\n" 389 "\tmovw %%ax, %[es]\n" 390 :[es] "=g" (es) 391 : : "%ax"); 392 return es; 393 } 394 395 396 uint16_t ia32_read_fs() 397 { 398 uint16_t fs= 0; 399 400 __asm__ volatile( 401 "\txorw %%ax,%%ax\n" 402 "\tmovw %%fs,%%ax\n" 403 "\tmovw %%ax, %[fs]\n" 404 :[fs] "=g" (fs) 405 : : "%ax"); 406 return fs; 407 } 408 409 410 uint16_t ia32_read_gs() 411 { 412 uint16_t gs= 0; 413 414 __asm__ volatile( 415 "\txorw %%ax,%%ax\n" 416 "\tmovw %%gs,%%ax\n" 417 "\tmovw %%ax, %[gs]\n" 418 :[gs] "=g" (gs) 419 : : "%ax"); 420 return gs; 421 } 422 423 424 uint16_t ia32_read_ss() 425 { 426 uint16_t ss= 0; 427 428 __asm__ volatile( 429 "\txorw %%ax,%%ax\n" 430 "\tmovw %%ss,%%ax\n" 431 "\tmovw %%ax, %[ss]\n" 432 :[ss] "=g" (ss) 433 : : "%ax"); 434 return ss; 435 } 436 437 438 void read_cr0(uint32_t* ret) 439 { 440 __asm__ volatile( 441 "\tmovl %[ret],%%ebx\n" 442 "\tmovl %%cr0,%%eax\n" 443 "\tmovl %%eax, (%%ebx)\n" 444 ::[ret] "p" (ret) 445 : "%eax","%ebx"); 446 } 447 448 449 void read_cr2(uint32_t* ret) 450 { 451 __asm__ volatile( 452 "\tmovl %[ret],%%ebx\n" 453 "\tmovl %%cr2,%%eax\n" 454 "\tmovl %%eax, (%%ebx)\n" 455 ::[ret] "p" (ret) 456 : "%eax","%ebx"); 457 } 458 459 460 void read_cr3(uint32_t* ret) 461 { 462 __asm__ volatile( 463 "\tmovl %[ret],%%ebx\n" 464 "\tmovl %%cr3,%%eax\n" 465 "\tmovl %%eax, (%%ebx)\n" 466 ::[ret] "p" (ret) 467 : "%eax","%ebx"); 468 } 469 470 471 void write_cr3(uint32_t value) 472 { 473 __asm__ volatile( 474 "\tmovl %[value], %%eax\n" 475 "\t movl %%eax, %%cr3\n" 476 ::[value] "m" (value) 477 : "%eax", "cc"); 478 } 479 480 481 void read_cr4(uint32_t* ret) 482 { 483 __asm__ volatile( 484 "\tmovl %[ret],%%ebx\n" 485 "\tmovl %%cr4,%%eax\n" 486 "\tmovl %%eax, (%%ebx)\n" 487 ::[ret] "r" (ret) 488 : "%eax","%ebx"); 489 } 490 491 492 void write_cr4(uint32_t val) 493 { 494 __asm__ volatile( 495 "\tmovl %[val],%%eax\n" 496 "\tmovl %%eax, %%cr4\n" 497 ::[val] "p" (val) 498 : "%eax","%ebx"); 499 } 500 501 502 void ia32_read_msr(uint32_t msr_id, uint64_t *p_value) 503 { 504 __asm__ volatile( 505 "\tmovl %[msr_id], %%ecx\n" 506 "\trdmsr\n" //write from EDX:EAX into MSR[ECX] 507 "\tmovl %[p_value], %%ecx\n" 508 "\tmovl %%eax, (%%ecx)\n" 509 "\tmovl %%edx, 4(%%ecx)\n" 510 ::[msr_id] "g" (msr_id), [p_value] "p" (p_value) 511 :"%eax", "%ecx", "%edx"); 512 } 513 514 515 void ia32_write_msr(uint32_t msr_id, uint64_t *p_value) 516 { 517 __asm__ volatile( 518 "\tmovl %[p_value], %%ecx\n" 519 "\tmovl (%%ecx), %%eax\n" 520 "\tmovl 4(%%ecx), %%edx\n" 521 "\tmovl %[msr_id], %%ecx\n" 522 "\twrmsr" //write from EDX:EAX into MSR[ECX] 523 ::[msr_id] "g" (msr_id), [p_value] "p" (p_value) 524 :"%eax", "%ecx", "%edx"); 525 } 526 527 528 int setup_evmm_stacks() 529 { 530 // bsp stack 531 evmm_bsp_stack_base= (uint32_t) 532 evmm_page_alloc(UVMM_DEFAULT_STACK_SIZE_PAGES); 533 if(evmm_bsp_stack_base==0) { 534 return 1; 535 } 536 vmm_memset((void*)evmm_bsp_stack_base, 0, 537 PAGE_4KB_SIZE*UVMM_DEFAULT_STACK_SIZE_PAGES); 538 // stack grows down 539 evmm_bsp_stack= evmm_bsp_stack_base+ 540 PAGE_4KB_SIZE*UVMM_DEFAULT_STACK_SIZE_PAGES; 541 evmm_stack_pointers_array[0]= evmm_bsp_stack; 542 543 // ap stacks 544 uint32_t evmm_ap_stack_base= 0; 545 uint32_t evmm_ap_stack= 0; 546 int i; 547 for(i=0; i<evmm_num_of_aps;i++) { 548 evmm_ap_stack_base= (uint32_t) 549 evmm_page_alloc(UVMM_DEFAULT_STACK_SIZE_PAGES); 550 vmm_memset((void*)evmm_ap_stack_base, 0, 551 PAGE_4KB_SIZE*UVMM_DEFAULT_STACK_SIZE_PAGES); 552 if(evmm_ap_stack_base==0) { 553 return 1; 554 } 555 // stack grows down 556 evmm_ap_stack= evmm_ap_stack_base+ 557 PAGE_4KB_SIZE*UVMM_DEFAULT_STACK_SIZE_PAGES; 558 evmm_stack_pointers_array[i+1]= evmm_ap_stack; 559 } 560 return 0; 561 } 562 563 564 void setup_64bit_descriptors(void) 565 { 566 // 1 page for segment descriptors 567 evmm_descriptor_table = (uint32_t) evmm_page_alloc(1); 568 569 // zero gdt 570 vmm_memset((void*)evmm_descriptor_table, 0, PAGE_4KB_SIZE); 571 572 // read 32-bit GDTR 573 __asm__ volatile ( 574 "\tsgdt %[gdtr_32]\n" 575 :[gdtr_32] "=m" (gdtr_32) 576 ::); 577 578 // copy it to the new 64-bit GDT 579 vmm_memcpy((void*)evmm_descriptor_table, (void *) gdtr_32.base, gdtr_32.limit+1); 580 581 uint32_t descriptor_base= ((uint32_t)evmm_descriptor_table+gdtr_32.limit+1); 582 583 // 16 byte aligned 584 descriptor_base= (descriptor_base+15)&(~0xf); 585 586 uint64_t* end_of_desciptor_table= (uint64_t*) descriptor_base; 587 // cs descriptor 588 end_of_desciptor_table[0]= 0x00a09a0000000000ULL; 589 end_of_desciptor_table[1]= 0x0000000000000000ULL; 590 // ds descriptor 591 end_of_desciptor_table[2]= 0x00a0920000000000ULL; 592 end_of_desciptor_table[3]= 0x0000000000000000ULL; 593 594 // selectors 595 evmm64_cs_selector = (uint32_t)(&end_of_desciptor_table[0]) - 596 (uint32_t)evmm_descriptor_table; 597 evmm64_ds_selector = (uint32_t)(&end_of_desciptor_table[2]) - 598 (uint32_t)evmm_descriptor_table; 599 600 // set 64 bit 601 gdtr_64.base= (uint32_t) evmm_descriptor_table; 602 gdtr_64.limit= gdtr_32.limit + (uint32_t)(&end_of_desciptor_table[4])- 603 (uint32_t)(&end_of_desciptor_table[0]); 604 605 #ifdef JLMDEBUG1 606 bprint("\nevmm64_cs_selector: %d, evmm64_ds_selector: %d\n", 607 evmm64_cs_selector, evmm64_ds_selector); 608 bprint("\ngdtr_32, base: 0x%08x, limit: %d\n", 609 gdtr_32.base, gdtr_32.limit); 610 HexDump((uint8_t*)gdtr_32.base, (uint8_t*)gdtr_32.base+gdtr_32.limit+1); 611 bprint("\ngdtr_64, base: 0x%08x, limit: %d\n", 612 gdtr_64.base, gdtr_64.limit); 613 HexDump((uint8_t*)gdtr_64.base, (uint8_t*)gdtr_64.base+gdtr_64.limit+1); 614 // LOOP_FOREVER 615 #endif 616 // load gdtr 617 // ia32_write_gdtr(&gdtr_64); 618 __asm__ volatile ( 619 "\tlgdt %[gdtr_64]\n" 620 :[gdtr_64] "=m" (gdtr_64) 621 ::); 622 } 623 624 625 // 2MB pages while running in 32-bit mode. 626 // It should scope full 32-bit space, i.e. 4G 627 void setup_64bit_paging(uint64_t memory_size) 628 { 629 uint32_t pdpt_entry_id; 630 uint32_t pdt_entry_id; 631 uint32_t address = 0; 632 633 if (memory_size > TOTAL_MEM) 634 memory_size = TOTAL_MEM; 635 636 // To cover 4G-byte addrerss space the minimum set is 637 // PML4 - 1entry 638 // PDPT - 4 entries 639 // PDT - 2048 entries 640 pml4_table = (EM64T_PML4 *) evmm_page_alloc(1); 641 vmm_memset(pml4_table, 0, PAGE_4KB_SIZE); 642 643 pdp_table = (EM64T_PDPE *) evmm_page_alloc(1); 644 vmm_memset(pdp_table, 0, PAGE_4KB_SIZE); 645 646 // only one entry is enough in PML4 table 647 pml4_table[0].lo.base_address_lo = (uint32_t) pdp_table >> 12; 648 pml4_table[0].lo.present = 1; 649 pml4_table[0].lo.rw = 1; 650 pml4_table[0].lo.us = 0; 651 pml4_table[0].lo.pwt = 0; 652 pml4_table[0].lo.pcd = 0; 653 pml4_table[0].lo.accessed = 0; 654 pml4_table[0].lo.ignored = 0; 655 pml4_table[0].lo.zeroes = 0; 656 pml4_table[0].lo.avl = 0; 657 658 // 4 entries is enough in PDPT 659 for (pdpt_entry_id = 0; pdpt_entry_id < 4; ++pdpt_entry_id) { 660 pdp_table[pdpt_entry_id].lo.present = 1; 661 pdp_table[pdpt_entry_id].lo.rw = 1; 662 pdp_table[pdpt_entry_id].lo.us = 0; 663 pdp_table[pdpt_entry_id].lo.pwt = 0; 664 pdp_table[pdpt_entry_id].lo.pcd = 0; 665 pdp_table[pdpt_entry_id].lo.accessed = 0; 666 pdp_table[pdpt_entry_id].lo.ignored = 0; 667 pdp_table[pdpt_entry_id].lo.zeroes = 0; 668 pdp_table[pdpt_entry_id].lo.avl = 0; 669 670 pd_table = (EM64T_PDE_2MB *) evmm_page_alloc(1); 671 vmm_memset(pd_table, 0, PAGE_4KB_SIZE); 672 pdp_table[pdpt_entry_id].lo.base_address_lo = (uint32_t) pd_table >> 12; 673 674 for (pdt_entry_id = 0; pdt_entry_id < 512; 675 ++pdt_entry_id, address += PAGE_2MB_SIZE) { 676 pd_table[pdt_entry_id].lo.present = 1; 677 pd_table[pdt_entry_id].lo.rw = 1; 678 pd_table[pdt_entry_id].lo.us = 0; 679 pd_table[pdt_entry_id].lo.pwt = 0; 680 pd_table[pdt_entry_id].lo.pcd = 0; 681 pd_table[pdt_entry_id].lo.accessed = 0; 682 pd_table[pdt_entry_id].lo.dirty = 0; 683 pd_table[pdt_entry_id].lo.pse = 1; 684 pd_table[pdt_entry_id].lo.global = 0; 685 pd_table[pdt_entry_id].lo.avl = 0; 686 pd_table[pdt_entry_id].lo.pat = 0; //???? 687 pd_table[pdt_entry_id].lo.zeroes = 0; 688 pd_table[pdt_entry_id].lo.base_address_lo = address >> 21; 689 } 690 } 691 // FIX(JLM): add flags here to set caching 692 evmm64_cr3= (((uint32_t) pml4_table) & 0xfffff000); 693 } 694 695 696 int setup_64bit() 697 { 698 // setup_64 bit for 64-bit on BSP 699 setup_64bit_descriptors(); 700 701 // setup paging, control registers and flags on BSP 702 setup_64bit_paging(TOTAL_MEM); 703 704 // set cr3 and cr4 705 write_cr3(evmm64_cr3); 706 read_cr4(&evmm64_cr4); 707 evmm64_cr4|= PAE_BIT|PSE_BIT; 708 write_cr4(evmm64_cr4); 709 710 // we don't really use this structure 711 init64.i64_gdtr= gdtr_64; 712 init64.i64_cr3= evmm64_cr3; // note we dont use the structure 713 init64.i64_cs = evmm64_cs_selector; 714 init64.i64_efer = 0; 715 return 0; 716 } 717 718 719 // ------------------------------------------------------------------------- 720 721 722 extern void startap_main(INIT32_STRUCT *p_init32, INIT64_STRUCT *p_init64, 723 VMM_STARTUP_STRUCT *p_startup, uint32_t entry_point); 724 725 726 void start_64bit_mode_on_aps(uint32_t stack_pointer, uint32_t start_address, 727 uint32_t segment, uint32_t cpu_id) 728 { 729 uint32_t evmm_reserved= 0; 730 #ifdef JLMDEBUG 731 bprint("start_64bit_mode_on_aps %u %u, cpu: %d\n", 732 stack_pointer, start_address, cpu_id); 733 #endif 734 __asm__ volatile ( 735 736 "\tcli\n" 737 738 // move start address to ebx for jump 739 "\tmovl %[start_address], %%ebx\n" 740 741 // initialize CR3 with PML4 base 742 "\tmovl %[evmm64_cr3], %%eax\n" 743 "\tmovl %%eax, %%cr3 \n" 744 745 // evmm_initial_stack is alread set, just 16 byte align it 746 // "movl %[stack_pointer], %%esp\n" 747 "\tandl $0xfffffff8, %%esp\n" 748 749 // prepare arguments for 64-bit mode 750 // there are 4 arguments (including reserved) 751 "\txor %%eax, %%eax\n" 752 "\tpush %%eax\n" 753 "\tpush %[evmm_reserved]\n" 754 "\tpush %%eax\n" 755 "\tpush %[evmm_p_a0]\n" 756 "\tpush %%eax\n" 757 "\tpush %[p_startup_struct]\n" 758 "\tpush %%eax\n" 759 "\tpush %[cpu_id]\n" 760 761 // enable 64-bit mode 762 // EFER MSR register 763 "\tmovl $0x0c0000080, %%ecx\n" 764 // read EFER into EAX 765 "\trdmsr\n" 766 767 // set EFER.LME=1 768 "\tbts $8, %%eax\n" 769 // set EFER.LMA=1 on BSP? 770 "\tbts $10, %%eax\n" 771 // write EFER 772 "\twrmsr\n" 773 774 // enable paging CR0.PG=1 775 "\tmovl %%cr0, %%eax\n" 776 "\tbts $31, %%eax\n" 777 "\tmovl %%eax, %%cr0\n" 778 779 // at this point we are in 32-bit compatibility mode 780 // LMA=1, CS.L=0, CS.D=1 781 // jump from 32bit compatibility mode into 64bit mode. 782 // mode switch 783 "ljmp $64, $1f\n" 784 785 "1:\n" 786 // in 64 bit this is actually pop rdi (cpiuid) 787 "\tpop %%edi\n" 788 // in 64 bit this is actually pop rsi (startup) 789 "\tpop %%esi\n" 790 // in 64 bit this is actually pop rdx (app struct) 791 "\tpop %%edx\n" 792 // in 64 bit this is actually pop rcx (reserved) 793 "\tpop %%ecx\n" 794 795 // "\tjmp .\n" // REMOVE! 796 "\tjmp *%%ebx\n" 797 "\tud2\n" 798 : 799 : [cpu_id] "g" (cpu_id), [p_startup_struct] "g" (p_startup_struct), 800 [evmm_p_a0] "g" (evmm_p_a0), [evmm_reserved] "g" (evmm_reserved), 801 [start_address] "g" (start_address), [segment] "g" (segment), 802 [stack_pointer] "m" (stack_pointer), [evmm64_cr3] "m" (evmm64_cr3) 803 : "%eax", "%ebx", "%ecx", "%edx"); 804 } 805 806 807 void init64_on_aps(uint32_t stack_pointer, INIT64_STRUCT *p_init64_data, 808 uint32_t start_address, uint32_t cpu_id) 809 { 810 #ifdef JLMDEBUG 811 bprint("init64_on_aps %u %u\n", stack_pointer, start_address); 812 #endif 813 // ia32_write_gdtr(&p_init64_data->i64_gdtr); 814 __asm__ volatile ( 815 "\tsgdt %[gdtr_64]\n" 816 :[gdtr_64] "=m" (gdtr_64) 817 ::); 818 819 write_cr3(evmm64_cr3); 820 write_cr4(evmm64_cr4); 821 start_64bit_mode_on_aps(stack_pointer, start_address, 822 p_init64_data->i64_cs, cpu_id); 823 #ifdef JLMDEBUG 824 // should never get here 825 bprint("init64_on_aps done\n"); 826 LOOP_FOREVER 827 #endif 828 } 829 830 831 // ------------------------------------------------------------------------- 832 833 834 // mbi and e820 support 835 836 837 #ifdef JLMDEBUG 838 void PrintMbi(const multiboot_info_t *mbi) 839 { 840 /* print mbi for debug */ 841 unsigned int i; 842 843 bprint("print mbi@%p ...\n", mbi); 844 bprint("\t flags: 0x%x\n", mbi->flags); 845 if ( mbi->flags & MBI_MEMLIMITS ) 846 bprint("\t mem_lower: %uKB, mem_upper: %uKB\n", mbi->mem_lower, 847 mbi->mem_upper); 848 if ( mbi->flags & MBI_BOOTDEV ) { 849 bprint("\t boot_device.bios_driver: 0x%x\n", 850 mbi->boot_device.bios_driver); 851 bprint("\t boot_device.top_level_partition: 0x%x\n", 852 mbi->boot_device.top_level_partition); 853 bprint("\t boot_device.sub_partition: 0x%x\n", 854 mbi->boot_device.sub_partition); 855 bprint("\t boot_device.third_partition: 0x%x\n", 856 mbi->boot_device.third_partition); 857 } 858 if ( mbi->flags & MBI_CMDLINE ) { 859 #define CHUNK_SIZE 72 860 #if 0 861 /* Break the command line up into 72 byte chunks */ 862 int cmdlen = strlen((char*)mbi->cmdline); 863 char *cmdptr = (char *)mbi->cmdline; 864 char chunk[CHUNK_SIZE+1]; 865 bprint("\t cmdline@0x%x: ", mbi->cmdline); 866 chunk[CHUNK_SIZE] = '\0'; 867 while (cmdlen > 0) { 868 vmm_strncpy(chunk, cmdptr, CHUNK_SIZE); 869 bprint("\n\t\"%s\"", chunk); 870 cmdptr += CHUNK_SIZE; 871 cmdlen -= CHUNK_SIZE; 872 } 873 #endif 874 bprint("\n"); 875 } 876 else { 877 bprint("no command line\n"); 878 } 879 880 if ( mbi->flags & MBI_MODULES ) { 881 bprint("\t mods_count: %u, mods_addr: 0x%x\n", mbi->mods_count, 882 mbi->mods_addr); 883 for ( i = 0; i < mbi->mods_count; i++ ) { 884 module_t *p = (module_t *)(mbi->mods_addr + i*sizeof(module_t)); 885 bprint("\t %d : mod_start: 0x%x, mod_end: 0x%x\n", i, 886 p->mod_start, p->mod_end); 887 bprint("\t string (@0x%x): \"%s\"\n", p->string, 888 (char *)p->string); 889 } 890 } 891 if ( mbi->flags & MBI_AOUT ) { 892 const aout_t *p = &(mbi->syms.aout_image); 893 bprint("\t aout :: tabsize: 0x%x, strsize: 0x%x, addr: 0x%x\n", 894 p->tabsize, p->strsize, p->addr); 895 } 896 if ( mbi->flags & MBI_ELF ) { 897 const elf_t *p = &(mbi->syms.elf_image); 898 bprint("\t elf :: num: %u, size: 0x%x, addr: 0x%x, shndx: 0x%x\n", 899 p->num, p->size, p->addr, p->shndx); 900 } 901 if ( mbi->flags & MBI_MEMMAP ) { 902 memory_map_t *p; 903 bprint("\t mmap_length: 0x%x, mmap_addr: 0x%x\n", mbi->mmap_length, 904 mbi->mmap_addr); 905 for ( p = (memory_map_t *)mbi->mmap_addr; 906 (uint32_t)p < mbi->mmap_addr + mbi->mmap_length; 907 p=(memory_map_t *)((uint32_t)p + p->size + sizeof(p->size)) ) { 908 bprint("\t size: 0x%x, base_addr: 0x%04x%04x, " 909 "length: 0x%04x%04x, type: %u\n", p->size, 910 p->base_addr_high, p->base_addr_low, 911 p->length_high, p->length_low, p->type); 912 } 913 } 914 if ( mbi->flags & MBI_DRIVES ) { 915 bprint("\t drives_length: %u, drives_addr: 0x%x\n", mbi->drives_length, 916 mbi->drives_addr); 917 } 918 if ( mbi->flags & MBI_CONFIG ) { 919 bprint("\t config_table: 0x%x\n", mbi->config_table); 920 } 921 if ( mbi->flags & MBI_BTLDNAME ) { 922 bprint("\t boot_loader_name@0x%x: %s\n", 923 mbi->boot_loader_name, (char *)mbi->boot_loader_name); 924 } 925 if ( mbi->flags & MBI_APM ) { 926 bprint("\t apm_table: 0x%x\n", mbi->apm_table); 927 } 928 if ( mbi->flags & MBI_VBE ) { 929 bprint("\t vbe_control_info: 0x%x\n" 930 "\t vbe_mode_info: 0x%x\n" 931 "\t vbe_mode: 0x%x\n" 932 "\t vbe_interface_seg: 0x%x\n" 933 "\t vbe_interface_off: 0x%x\n" 934 "\t vbe_interface_len: 0x%x\n", 935 mbi->vbe_control_info, 936 mbi->vbe_mode_info, 937 mbi->vbe_mode, 938 mbi->vbe_interface_seg, 939 mbi->vbe_interface_off, 940 mbi->vbe_interface_len 941 ); 942 } 943 } 944 #endif 945 946 947 module_t *get_module(const multiboot_info_t *mbi, unsigned int i) 948 { 949 if ( mbi == NULL ) { 950 bprint("Error: mbi pointer is zero.\n"); 951 return NULL; 952 } 953 954 if ( i >= mbi->mods_count ) { 955 bprint("invalid module #\n"); 956 return NULL; 957 } 958 959 return (module_t *)(mbi->mods_addr + i * sizeof(module_t)); 960 } 961 962 963 unsigned long max(unsigned long a, unsigned long b) 964 { 965 if(b>a) 966 return b; 967 return a; 968 } 969 970 971 unsigned long get_mbi_mem_end(const multiboot_info_t *mbi) 972 { 973 unsigned long end = (unsigned long)(mbi + 1); 974 975 if ( mbi->flags & MBI_CMDLINE ) 976 end = max(end, mbi->cmdline + vmm_strlen((char *)mbi->cmdline) + 1); 977 if ( mbi->flags & MBI_MODULES ) { 978 end = max(end, mbi->mods_addr + mbi->mods_count * sizeof(module_t)); 979 unsigned int i; 980 for ( i = 0; i < mbi->mods_count; i++ ) { 981 module_t *p = get_module(mbi, i); 982 end = max(end, p->string + vmm_strlen((char *)p->string) + 1); 983 } 984 } 985 if ( mbi->flags & MBI_AOUT ) { 986 const aout_t *p = &(mbi->syms.aout_image); 987 end = max(end, p->addr + p->tabsize 988 + sizeof(unsigned long) + p->strsize); 989 } 990 if ( mbi->flags & MBI_ELF ) { 991 const elf_t *p = &(mbi->syms.elf_image); 992 end = max(end, p->addr + p->num * p->size); 993 } 994 if ( mbi->flags & MBI_MEMMAP ) 995 end = max(end, mbi->mmap_addr + mbi->mmap_length); 996 if ( mbi->flags & MBI_DRIVES ) 997 end = max(end, mbi->drives_addr + mbi->drives_length); 998 // mbi->config_table field should contain 999 // "the address of the rom configuration table returned by the 1000 // GET CONFIGURATION bios call", so skip it 1001 if ( mbi->flags & MBI_BTLDNAME ) 1002 end = max(end, mbi->boot_loader_name 1003 + vmm_strlen((char *)mbi->boot_loader_name) + 1); 1004 if ( mbi->flags & MBI_APM ) 1005 end = max(end, mbi->apm_table + 20); 1006 if ( mbi->flags & MBI_VBE ) { 1007 end = max(end, mbi->vbe_control_info + 512); 1008 end = max(end, mbi->vbe_mode_info + 256); 1009 } 1010 1011 return PAGE_UP(end); 1012 } 1013 1014 1015 // ------------------------------------------------------------------------- 1016 1017 1018 // linux memory initialization and guest setup 1019 1020 1021 // Linux data layout 1022 // Start of linux data area <-- evmm_heap_base - 3*PAGE_SIZE 1023 // boot_parameters <- offset 0 1024 // command line <- offset sizeof(boot_params_t) 1025 // 4K stack <-- evmm_heap_base - PAGE_SIZE 1026 // evmm_heap_base <-- where linux_esp_register will point 1027 int allocate_linux_data() 1028 { 1029 // setup linux stack 1030 linux_stack_base = evmm_heap_base-PAGE_SIZE; 1031 linux_stack_size= PAGE_SIZE; 1032 vmm_memset((void*)linux_stack_base, 0, PAGE_SIZE); 1033 1034 // linux data area 1035 #ifdef LOWBOOTPARAMS 1036 linux_boot_parameters= 0x20000; 1037 #else 1038 linux_boot_parameters= (linux_stack_base-linux_stack_size-2*PAGE_SIZE); 1039 #endif 1040 vmm_memset((void*)linux_boot_parameters, 0, 2*PAGE_SIZE); 1041 return 0; 1042 } 1043 1044 1045 #ifndef Ia32VmxVmcsGuestSleepStateWaitForSipi 1046 // defined in vmx_vmcs.h 1047 #define Ia32VmxVmcsGuestSleepStateWaitForSipi 3 1048 #endif 1049 1050 void setup_real_mode(VMM_GUEST_CPU_STARTUP_STATE *s) 1051 { 1052 s->size_of_this_struct = sizeof(VMM_GUEST_CPU_STARTUP_STATE); 1053 s->version_of_this_struct = VMM_GUEST_CPU_STARTUP_STATE_VERSION; 1054 s->msr.msr_sysenter_cs = tboot_msr_sysenter_cs; 1055 s->msr.msr_sysenter_eip = tboot_msr_sysenter_eip; 1056 s->msr.msr_sysenter_esp = tboot_msr_sysenter_esp; 1057 s->msr.msr_efer = tboot_msr_efer; 1058 s->msr.msr_pat = tboot_msr_pat; 1059 s->msr.msr_debugctl = tboot_msr_debugctl; 1060 s->msr.pending_exceptions = 0; 1061 s->msr.interruptibility_state = 0; 1062 s->msr.activity_state = 0; 1063 s->msr.smbase = 0; 1064 s->control.gdtr.base = (UINT64)0; 1065 s->control.gdtr.limit = (UINT32)0xffff; 1066 s->control.idtr.base = 0x0; 1067 s->control.idtr.limit = 0xffff; 1068 s->control.cr[IA32_CTRL_CR0] = 0; 1069 s->control.cr[IA32_CTRL_CR2] = 0; 1070 s->control.cr[IA32_CTRL_CR3] = 0; 1071 s->control.cr[IA32_CTRL_CR4] = 0; 1072 s->seg.segment[IA32_SEG_LDTR].attributes = 0x00010000; 1073 s->seg.segment[IA32_SEG_CS].base = 0; 1074 s->seg.segment[IA32_SEG_CS].limit = 0xffff; 1075 s->seg.segment[IA32_SEG_CS].attributes = 0x93; 1076 s->seg.segment[IA32_SEG_DS].base = 0; 1077 s->seg.segment[IA32_SEG_DS].limit = 0xffff; 1078 s->seg.segment[IA32_SEG_DS].attributes = 0x93; 1079 s->seg.segment[IA32_SEG_ES].base = 0; 1080 s->seg.segment[IA32_SEG_ES].limit = 0xffff; 1081 s->seg.segment[IA32_SEG_ES].attributes = 0x93; 1082 s->seg.segment[IA32_SEG_FS].base = 0; 1083 s->seg.segment[IA32_SEG_FS].limit = 0xffff; 1084 s->seg.segment[IA32_SEG_FS].attributes = 0x93; 1085 s->seg.segment[IA32_SEG_GS].base = 0; 1086 s->seg.segment[IA32_SEG_GS].limit = 0xffff; 1087 s->seg.segment[IA32_SEG_GS].attributes = 0x93; 1088 s->seg.segment[IA32_SEG_SS].base = 0; 1089 s->seg.segment[IA32_SEG_SS].limit = 0xffff; 1090 s->seg.segment[IA32_SEG_SS].attributes = 0x93; 1091 s->gp.reg[IA32_REG_RDX] = 0x0080; // boot from hd0 1092 s->gp.reg[IA32_REG_RIP] = 0x2000; // grub stage 1.5 buffer 1093 s->gp.reg[IA32_REG_RSP] = 0x2000; // grub stage 1.5 buffer 1094 s->gp.reg[IA32_REG_RFLAGS] &= 0xfffffdff; 1095 return; 1096 } 1097 1098 1099 int linux_setup(void) 1100 { 1101 uint32_t i; 1102 IA32_IDTR idtr; 1103 // stack grows down, BSP only 1104 linux_esp_register= linux_stack_base+PAGE_SIZE; 1105 1106 // guest state from table 1107 guest_gdtr.base= (uint32_t)&guest_gdt[0]; 1108 guest_gdtr.limit= 63; 1109 guest_cs_selector= 0x10; 1110 guest_cs_base= 0; 1111 guest_cs_limit= 0xffffffff; 1112 guest_cs_attr= 0xc09b; 1113 guest_ds_selector= 0x18; 1114 guest_ds_base= 0; 1115 guest_ds_limit= 0xffffffff; 1116 guest_ds_attr= 0xc093; 1117 guest_ss_selector= 0x18; 1118 guest_ss_base= 0; 1119 guest_ss_limit= 0xffffffff; 1120 guest_ss_attr= 0xc093; 1121 guest_tr_selector= 0x20; 1122 guest_tr_base= 0; 1123 guest_tr_limit= 0x0000ffff; 1124 guest_tr_attr= 0x0000808b; 1125 1126 guest_msr_debugctl= tboot_msr_debugctl; 1127 guest_msr_efer= tboot_msr_efer; 1128 guest_msr_pat= tboot_msr_pat; 1129 guest_msr_sysenter_cs= tboot_msr_sysenter_cs; 1130 guest_msr_sysenter_esp= tboot_msr_sysenter_esp; 1131 guest_msr_sysenter_eip= tboot_msr_sysenter_eip; 1132 1133 guest_cr0= tboot_cr0; 1134 guest_cr2= tboot_cr2; 1135 guest_cr3= 0; 1136 guest_cr4= tboot_cr4; 1137 1138 // AP's are in real mode waiting for sipi (halt state) 1139 // Set the VMCS GUEST ACTIVITY STATE in the VMCS Guest State Area 1140 // to "halted." 1141 // Bootstrap processor is 0 1142 int k; 1143 for(k=0; k<=evmm_num_of_aps; k++) { 1144 if(k>0) { 1145 // setup_real_mode(&guest_processor_state[k]); 1146 continue; 1147 } 1148 guest_processor_state[k].size_of_this_struct = 1149 sizeof(VMM_GUEST_CPU_STARTUP_STATE); 1150 guest_processor_state[k].version_of_this_struct = 1151 VMM_GUEST_CPU_STARTUP_STATE_VERSION; 1152 guest_processor_state[k].reserved_1 = 0; 1153 1154 // zero out all the registers. Then set the ones that linux expects. 1155 for (i = 0; i < IA32_REG_GP_COUNT; i++) { 1156 guest_processor_state[k].gp.reg[i]= (uint64_t) 0; 1157 } 1158 1159 guest_processor_state[k].gp.reg[IA32_REG_RIP]= (uint64_t)linux_entry_address; 1160 guest_processor_state[k].gp.reg[IA32_REG_RDI]= (uint64_t) linux_edi_register; 1161 guest_processor_state[k].gp.reg[IA32_REG_RSI]= (uint64_t) linux_edi_register; 1162 guest_processor_state[k].gp.reg[IA32_REG_RSP]= (uint64_t) 1163 evmm_stack_pointers_array[k]; 1164 guest_processor_state[k].gp.reg[IA32_REG_RFLAGS] = 0x02; 1165 guest_processor_state[k].gp.reg[IA32_REG_RBP] = 0; 1166 1167 for (i = 0; i < IA32_REG_XMM_COUNT; i++) { 1168 guest_processor_state[k].xmm.reg[i].uint64[0] = (uint64_t)0; 1169 guest_processor_state[k].xmm.reg[i].uint64[1] = (uint64_t)0; 1170 } 1171 guest_processor_state[k].msr.msr_debugctl= guest_msr_debugctl; 1172 guest_processor_state[k].msr.msr_efer= guest_msr_efer; 1173 guest_processor_state[k].msr.msr_pat= guest_msr_pat; 1174 guest_processor_state[k].msr.msr_sysenter_esp= guest_msr_sysenter_esp; 1175 guest_processor_state[k].msr.msr_sysenter_eip= guest_msr_sysenter_eip; 1176 guest_processor_state[k].msr.msr_sysenter_cs= guest_msr_sysenter_cs; 1177 guest_processor_state[k].msr.pending_exceptions = 0; 1178 guest_processor_state[k].msr.interruptibility_state = 0; 1179 guest_processor_state[k].msr.activity_state = 0; 1180 guest_processor_state[k].msr.smbase = 0; 1181 1182 for (i = 0; i < IA32_CTRL_COUNT; i++) { 1183 guest_processor_state[k].control.cr[i] = 0; 1184 } 1185 guest_processor_state[k].control.cr[IA32_CTRL_CR0]= (uint64_t) guest_cr0; 1186 guest_processor_state[k].control.cr[IA32_CTRL_CR2]= (uint64_t) guest_cr2; 1187 guest_processor_state[k].control.cr[IA32_CTRL_CR3] = (uint64_t) guest_cr3; 1188 guest_processor_state[k].control.cr[IA32_CTRL_CR4]= (uint64_t) guest_cr4; 1189 1190 for (i = 0; i < IA32_SEG_COUNT; i++) { 1191 guest_processor_state[k].seg.segment[i].base = 0; 1192 guest_processor_state[k].seg.segment[i].limit = 0; 1193 } 1194 1195 guest_processor_state[k].control.gdtr.base = (uint64_t)(uint32_t) 1196 guest_gdtr.base; 1197 guest_processor_state[k].control.gdtr.limit = (uint64_t)(uint32_t) 1198 guest_gdtr.limit; 1199 1200 __asm__ volatile ( 1201 "\tsgdt %[idtr]\n" 1202 :[idtr] "=m" (idtr) 1203 ::); 1204 1205 guest_processor_state[k].control.idtr.base = (UINT64)idtr.base; 1206 guest_processor_state[k].control.idtr.limit = (UINT32)idtr.limit; 1207 1208 guest_processor_state[k].seg.segment[IA32_SEG_CS].selector = 1209 (uint64_t) guest_cs_selector; 1210 guest_processor_state[k].seg.segment[IA32_SEG_DS].selector = 1211 (uint64_t) guest_ds_selector; 1212 guest_processor_state[k].seg.segment[IA32_SEG_SS].selector = 1213 (uint64_t) guest_ss_selector; 1214 guest_processor_state[k].seg.segment[IA32_SEG_ES].selector = 1215 (uint64_t) guest_ds_selector; 1216 guest_processor_state[k].seg.segment[IA32_SEG_FS].selector = 1217 (uint64_t) guest_ds_selector; 1218 guest_processor_state[k].seg.segment[IA32_SEG_GS].selector = 1219 (uint64_t) guest_ds_selector; 1220 guest_processor_state[k].seg.segment[IA32_SEG_TR].selector= 1221 (uint64_t) guest_tr_selector; 1222 1223 guest_processor_state[k].seg.segment[IA32_SEG_CS].base = guest_cs_base; 1224 guest_processor_state[k].seg.segment[IA32_SEG_DS].base = guest_ds_base; 1225 guest_processor_state[k].seg.segment[IA32_SEG_SS].base = guest_ss_base; 1226 guest_processor_state[k].seg.segment[IA32_SEG_ES].base = guest_ds_base; 1227 guest_processor_state[k].seg.segment[IA32_SEG_FS].base = guest_ds_base; 1228 guest_processor_state[k].seg.segment[IA32_SEG_GS].base = guest_ds_base; 1229 guest_processor_state[k].seg.segment[IA32_SEG_TR].base = guest_ds_base; 1230 1231 guest_processor_state[k].seg.segment[IA32_SEG_CS].limit = guest_cs_limit; 1232 guest_processor_state[k].seg.segment[IA32_SEG_DS].limit = guest_ds_limit; 1233 guest_processor_state[k].seg.segment[IA32_SEG_SS].limit = guest_ss_limit; 1234 guest_processor_state[k].seg.segment[IA32_SEG_ES].limit = guest_ds_limit; 1235 guest_processor_state[k].seg.segment[IA32_SEG_FS].limit = guest_ds_limit; 1236 guest_processor_state[k].seg.segment[IA32_SEG_GS].limit = guest_ds_limit; 1237 guest_processor_state[k].seg.segment[IA32_SEG_TR].limit = guest_tr_limit; 1238 1239 guest_processor_state[k].seg.segment[IA32_SEG_CS].attributes = guest_cs_attr; 1240 guest_processor_state[k].seg.segment[IA32_SEG_DS].attributes = guest_ds_attr; 1241 guest_processor_state[k].seg.segment[IA32_SEG_SS].attributes = guest_ss_attr; 1242 guest_processor_state[k].seg.segment[IA32_SEG_ES].attributes = guest_ds_attr; 1243 guest_processor_state[k].seg.segment[IA32_SEG_FS].attributes = guest_ds_attr; 1244 guest_processor_state[k].seg.segment[IA32_SEG_GS].attributes = guest_ds_attr; 1245 guest_processor_state[k].seg.segment[IA32_SEG_TR].attributes = guest_tr_attr; 1246 1247 guest_processor_state[k].seg.segment[IA32_SEG_LDTR].selector= 0x0; 1248 guest_processor_state[k].seg.segment[IA32_SEG_LDTR].base= 0x0; 1249 guest_processor_state[k].seg.segment[IA32_SEG_LDTR].limit= 0xffffffff; 1250 guest_processor_state[k].seg.segment[IA32_SEG_LDTR].attributes= 0x00010000; 1251 } 1252 return 0; 1253 } 1254 1255 1256 // This builds the 24 byte extended e820 table 1257 static uint64_t evmm_get_e820_table(const multiboot_info_t *mbi) 1258 { 1259 uint32_t entry_offset = 0; 1260 int i= 0; 1261 1262 evmm_e820 = (INT15_E820_MEMORY_MAP *)evmm_page_alloc(1); 1263 if (evmm_e820 == NULL) 1264 return (uint64_t)-1; 1265 1266 while ( entry_offset < mbi->mmap_length ) { 1267 memory_map_t *entry = (memory_map_t *) 1268 (mbi->mmap_addr + entry_offset); 1269 evmm_e820->memory_map_entry[i].basic_entry.base_address = 1270 (((uint64_t)entry->base_addr_high)<<32)+entry->base_addr_low; 1271 evmm_e820->memory_map_entry[i].basic_entry.length = 1272 (((uint64_t)entry->length_high)<<32)+entry->length_low; 1273 evmm_e820->memory_map_entry[i].basic_entry.address_range_type= 1274 entry->type; 1275 evmm_e820->memory_map_entry[i].extended_attributes.uint32 = 1; 1276 i++; 1277 entry_offset += entry->size + sizeof(entry->size); 1278 } 1279 evmm_num_e820_entries = i; 1280 evmm_e820->memory_map_size = i*sizeof(INT15_E820_MEMORY_MAP_ENTRY_EXT); 1281 evmm_start_of_e820_table = (uint64_t)(uint32_t)evmm_e820; 1282 1283 return evmm_start_of_e820_table; 1284 } 1285 1286 1287 void linux_parse_cmdline(const char *cmdline) 1288 { 1289 cmdline_parse(cmdline, linux_cmdline_options, linux_param_values); 1290 } 1291 1292 1293 int get_linux_vga(int *vid_mode) 1294 { 1295 const char *vga = get_option_val(linux_cmdline_options, 1296 linux_param_values, "vga"); 1297 if ( vga == NULL || vid_mode == NULL ) 1298 return 1; 1299 if ( vmm_strcmp(vga, "normal") == 0 ) 1300 *vid_mode = 0xFFFF; 1301 else if ( vmm_strcmp(vga, "ext") == 0 ) 1302 *vid_mode = 0xFFFE; 1303 else if ( vmm_strcmp(vga, "ask") == 0 ) 1304 *vid_mode = 0xFFFD; 1305 else 1306 *vid_mode = vmm_strtoul(vga, NULL, 0); 1307 return 0; 1308 } 1309 1310 1311 unsigned long get_bootstrap_mem_end(void) 1312 { 1313 return PAGE_UP(bootstrap_end); 1314 } 1315 1316 1317 static inline bool plus_overflow_u32(uint32_t x, uint32_t y) 1318 { 1319 return ((((uint32_t)(~0)) - x) < y); 1320 } 1321 1322 1323 // expand linux kernel with kernel image and initrd image 1324 int expand_linux_image( multiboot_info_t* mbi, 1325 uint32_t linux_image, uint32_t linux_size, 1326 uint32_t initrd_image, uint32_t initrd_size, 1327 uint32_t* entry_point) 1328 { 1329 linux_kernel_header_t *hdr; 1330 uint32_t real_mode_base, protected_mode_base; 1331 unsigned long real_mode_size, protected_mode_size; 1332 // Note: real_mode_size + protected_mode_size = linux_size 1333 uint32_t initrd_base; 1334 int vid_mode = 0; 1335 boot_params_t* boot_params; 1336 1337 // sanity check 1338 if ( linux_image == 0) { 1339 bprint("Error: Linux kernel image is zero.\n"); 1340 return 1; 1341 } 1342 if ( linux_size == 0 ) { 1343 bprint("Error: Linux kernel size is zero.\n"); 1344 return 1; 1345 } 1346 1347 if ( linux_size < sizeof(linux_kernel_header_t) ) { 1348 bprint("Error: Linux kernel size is too small.\n"); 1349 return 1; 1350 } 1351 hdr = (linux_kernel_header_t *)(linux_image + KERNEL_HEADER_OFFSET); 1352 if ( hdr == NULL ) { 1353 bprint("Error: Linux kernel header is zero.\n"); 1354 return 1; 1355 } 1356 if ( entry_point == NULL ) { 1357 bprint("Error: Output pointer is zero.\n"); 1358 return 1; 1359 } 1360 1361 // recommended layout 1362 // 0x0000 - 0x7FFF Real mode kernel 1363 // 0x8000 - 0x8FFF Stack and heap 1364 // 0x9000 - 0x90FF Kernel command line 1365 1366 // if setup_sects is zero, set to default value 4 1367 if ( hdr->setup_sects == 0 ) 1368 hdr->setup_sects = DEFAULT_SECTOR_NUM; 1369 if ( hdr->setup_sects > MAX_SECTOR_NUM ) { 1370 bprint("Error: Linux setup sectors %d exceed maximum limitation 64.\n", 1371 hdr->setup_sects); 1372 return 1; 1373 } 1374 1375 // set vid_mode 1376 linux_parse_cmdline(linux_command_line); 1377 if (get_linux_vga(&vid_mode)) 1378 hdr->vid_mode = vid_mode; 1379 1380 // compare to the magic number 1381 if ( hdr->header != HDRS_MAGIC ) { 1382 bprint("Error: Old kernel (< 2.6.20) is not supported by tboot.\n"); 1383 return 1; 1384 } 1385 if ( hdr->version < 0x0205 ) { 1386 bprint("Error: Old kernel (<2.6.20) is not supported by tboot.\n"); 1387 return 1; 1388 } 1389 // boot loader is grub, set type_of_loader to 0x7 1390 hdr->type_of_loader = LOADER_TYPE_GRUB; 1391 1392 // set loadflags and heap_end_ptr 1393 hdr->loadflags |= FLAG_CAN_USE_HEAP; // can use heap 1394 hdr->heap_end_ptr = KERNEL_CMDLINE_OFFSET - BOOT_SECTOR_OFFSET; 1395 1396 // load initrd and set ramdisk_image and ramdisk_size 1397 // The initrd should typically be located as high in memory as 1398 // possible, as it may otherwise get overwritten by the early 1399 // kernel initialization sequence. 1400 uint64_t mem_limit = TOTAL_MEM; 1401 1402 uint64_t max_ram_base, max_ram_size; 1403 get_highest_sized_ram(initrd_size, mem_limit, 1404 &max_ram_base, &max_ram_size); 1405 if ( max_ram_size == 0 ) { 1406 bprint("not enough RAM for initrd\n"); 1407 return 1; 1408 } 1409 if ( initrd_size > max_ram_size ) { 1410 bprint("initrd_size is too large\n"); 1411 return 1; 1412 } 1413 if ( max_ram_base > ((uint64_t)(uint32_t)(~0)) ) { 1414 bprint("max_ram_base is too high\n"); 1415 return 1; 1416 } 1417 initrd_base = (max_ram_base + max_ram_size - initrd_size) & PAGE_MASK; 1418 1419 // should not exceed initrd_addr_max 1420 if ( (initrd_base + initrd_size) > hdr->initrd_addr_max ) { 1421 if ( hdr->initrd_addr_max < initrd_size ) { 1422 bprint("initrd_addr_max is too small\n"); 1423 return 1; 1424 } 1425 initrd_base = hdr->initrd_addr_max - initrd_size; 1426 initrd_base = initrd_base & PAGE_MASK; 1427 } 1428 1429 vmm_memcpy ((void *)initrd_base, (void*)initrd_image, initrd_size); 1430 1431 hdr->ramdisk_image = initrd_base; 1432 hdr->ramdisk_size = initrd_size; 1433 1434 // calc location of real mode part 1435 // FIX (JLM) TBOOT defines 1436 real_mode_base = LEGACY_REAL_START; 1437 if ( mbi->flags & MBI_MEMLIMITS ) 1438 real_mode_base = (mbi->mem_lower << 10) - REAL_MODE_SIZE; 1439 if ( real_mode_base < TBOOT_KERNEL_CMDLINE_ADDR + 1440 TBOOT_KERNEL_CMDLINE_SIZE ) 1441 real_mode_base = TBOOT_KERNEL_CMDLINE_ADDR + 1442 TBOOT_KERNEL_CMDLINE_SIZE; 1443 if ( real_mode_base > LEGACY_REAL_START ) 1444 real_mode_base = LEGACY_REAL_START; 1445 real_mode_size = (hdr->setup_sects + 1) * SECTOR_SIZE; 1446 if ( real_mode_size + sizeof(boot_params_t) > KERNEL_CMDLINE_OFFSET ) { 1447 bprint("realmode data is too large\n"); 1448 return 1; 1449 } 1450 1451 // calc location of protected mode part 1452 protected_mode_size = linux_size - real_mode_size; 1453 1454 // if kernel is relocatable then move it above tboot 1455 // else it may expand over top of tboot 1456 if ( hdr->relocatable_kernel ) { 1457 #if 1 1458 protected_mode_base = 0x02000000; 1459 hdr->loadflags|= 0x40; // keep segments 1460 #else 1461 bprint("relocatable kernel\n"); 1462 protected_mode_base = (uint32_t)get_bootstrap_mem_end(); 1463 /* fix possible mbi overwrite in grub2 case */ 1464 /* assuming grub2 only used for relocatable kernel */ 1465 /* assuming mbi & components are contiguous */ 1466 unsigned long mbi_end = get_mbi_mem_end(mbi); 1467 if ( mbi_end > protected_mode_base ) 1468 protected_mode_base = mbi_end; 1469 // overflow? 1470 if ( plus_overflow_u32(protected_mode_base, 1471 hdr->kernel_alignment - 1) ) { 1472 bprint("protected_mode_base overflows\n"); 1473 return 1; 1474 } 1475 // round it up to kernel alignment 1476 protected_mode_base= (protected_mode_base+hdr->kernel_alignment-1) 1477 & ~(hdr->kernel_alignment-1); 1478 #endif 1479 hdr->code32_start = protected_mode_base; 1480 } 1481 else if ( hdr->loadflags & FLAG_LOAD_HIGH ) { 1482 protected_mode_base = LINUX_DEFAULT_LOAD_ADDRESS; // bzImage:0x100000 1483 if ( plus_overflow_u32(protected_mode_base, protected_mode_size) ) { 1484 bprint("protected_mode_base+protected_mode_size overflows\n"); 1485 return 1; 1486 } 1487 // Check: protected mode part cannot exceed mem_upper 1488 if ( mbi->flags & MBI_MEMLIMITS ) 1489 if ( (protected_mode_base + protected_mode_size) 1490 > ((mbi->mem_upper << 10) + 0x100000) ) { 1491 bprint("Error: Linux protected mode part (0x%lx ~ 0x%lx) " 1492 "exceeds mem_upper (0x%lx ~ 0x%lx).\n", 1493 (unsigned long)protected_mode_base, 1494 (unsigned long)(protected_mode_base + protected_mode_size), 1495 (unsigned long)0x100000, 1496 (unsigned long)((mbi->mem_upper << 10) + 0x100000)); 1497 return 1; 1498 } 1499 } 1500 else { 1501 bprint("Error: Linux protected mode not loaded high\n"); 1502 return 1; 1503 } 1504 1505 // set cmd_line_ptr 1506 hdr->cmd_line_ptr = real_mode_base + KERNEL_CMDLINE_OFFSET; 1507 1508 // load protected-mode part 1509 vmm_memcpy((void *)protected_mode_base, 1510 (void*)(linux_image + real_mode_size), 1511 protected_mode_size); 1512 #ifdef JLMDEBUG1 1513 bprint("Kernel (protected mode) from 0x%lx to 0x%lx\n", 1514 (unsigned long)protected_mode_base, 1515 (unsigned long)(protected_mode_base + protected_mode_size)); 1516 #endif 1517 1518 // load real-mode part 1519 vmm_memcpy((void *)real_mode_base, (void*)linux_image, real_mode_size); 1520 #ifdef JLMDEBUG1 1521 bprint("Kernel (real mode) from 0x%lx to 0x%lx\n", 1522 (unsigned long)real_mode_base, 1523 (unsigned long)(real_mode_base + real_mode_size)); 1524 #endif 1525 1526 // copy cmdline 1527 if ( mbi->flags & MBI_CMDLINE ) { 1528 const char *kernel_cmdline = skip_filename((const char *)mbi->cmdline); 1529 vmm_memcpy((void *)hdr->cmd_line_ptr, kernel_cmdline, 1530 vmm_strlen((const char*)kernel_cmdline)); 1531 } 1532 1533 // need to put boot_params in real mode area so it gets mapped 1534 boot_params = (boot_params_t *)(real_mode_base + real_mode_size); 1535 vmm_memset(boot_params, 0, sizeof(*boot_params)); 1536 vmm_memcpy(&boot_params->hdr, hdr, sizeof(*hdr)); 1537 1538 // copy e820 table to boot parameters 1539 if ( mbi->flags & MBI_MEMMAP ) { 1540 int i; 1541 1542 memory_map_t *p = (memory_map_t *)mbi->mmap_addr; 1543 for ( i = 0; (uint32_t)p < mbi->mmap_addr + mbi->mmap_length; i++ ) { 1544 boot_params->e820_map[i].addr = ((uint64_t)p->base_addr_high << 32) 1545 | (uint64_t)p->base_addr_low; 1546 boot_params->e820_map[i].size = ((uint64_t)p->length_high << 32) 1547 | (uint64_t)p->length_low; 1548 boot_params->e820_map[i].type = p->type; 1549 p = (void *)p + p->size + sizeof(p->size); 1550 } 1551 boot_params->e820_entries = i; 1552 } 1553 1554 screen_info_t *screen = (screen_info_t *)&boot_params->screen_info; 1555 screen->orig_video_mode = 3; // BIOS 80*25 text mode 1556 screen->orig_video_lines = 25; 1557 screen->orig_video_cols = 80; 1558 screen->orig_video_points = 16; // set font height to 16 pixels 1559 screen->orig_video_isVGA = 1; // use VGA text screen setups 1560 screen->orig_y = 24; // last line of screen 1561 linux_original_boot_parameters= (uint32_t) boot_params; 1562 linux_real_mode_start= real_mode_base; 1563 linux_real_mode_size= real_mode_size; 1564 linux_protected_mode_start= protected_mode_base; 1565 linux_protected_mode_size= protected_mode_size; 1566 initram_start_address= initrd_base; 1567 *entry_point = hdr->code32_start; 1568 return 0; 1569 } 1570 1571 1572 #ifdef INCLUDETBOOTCMDLINE 1573 int adjust_kernel_cmdline(multiboot_info_t *mbi, 1574 const void *tboot_shared_addr) 1575 { 1576 const char *old_cmdline; 1577 1578 if ( mbi->flags & MBI_CMDLINE && mbi->cmdline != 0 ) 1579 old_cmdline = (const char *)mbi->cmdline; 1580 else 1581 old_cmdline = ""; 1582 1583 vscnprintf(new_cmdline, TBOOT_KERNEL_CMDLINE_SIZE, "%s tboot=%p", 1584 old_cmdline, tboot_shared_addr); 1585 new_cmdline[TBOOT_KERNEL_CMDLINE_SIZE - 1] = '\0'; 1586 1587 mbi->cmdline = (u32)new_cmdline; 1588 mbi->flags |= MBI_CMDLINE; 1589 1590 return 0; 1591 } 1592 #endif 1593 1594 1595 // relocate and setup variables for evmm entry 1596 int prepare_primary_guest_args(multiboot_info_t *mbi) 1597 { 1598 (void)mbi; 1599 if(linux_original_boot_parameters==0) { 1600 bprint("original boot parameters not set\n"); 1601 return 1; 1602 } 1603 if(linux_boot_parameters==0) { 1604 bprint("linux boot parameter area fails\n"); 1605 LOOP_FOREVER 1606 } 1607 vmm_memcpy((void*)linux_boot_parameters, (void*)linux_original_boot_parameters, 1608 sizeof(boot_params_t)); 1609 boot_params_t* new_boot_params= (boot_params_t*)linux_boot_parameters; 1610 new_boot_params->e820_entries= g_nr_map; 1611 1612 // set address of copied tboot shared page 1613 #ifdef TBOOT_SHARED_PAGE 1614 *(uint64_t *)(new_boot_params->tboot_shared_addr)= (uint32_t)shared_page; 1615 #else 1616 *(uint64_t *)(new_boot_params->tboot_shared_addr)= 0; 1617 #endif 1618 1619 // copy command line after boot parameters 1620 new_cmdline= (char*)(linux_boot_parameters+sizeof(boot_params_t)); 1621 #ifdef INCLUDETBOOTCMDLINE 1622 if(adjust_kernel_cmdline(mbi, (const void*)new_boot_params->tboot_shared_addr)!=0) { 1623 bprint("cant adjust linux command line\n"); 1624 LOOP_FOREVER 1625 } 1626 #else 1627 if(linux_command_line!=0) { 1628 vmm_memcpy((void*)new_cmdline, (void*) linux_command_line, 1629 vmm_strlen((char*)linux_command_line)+1); 1630 new_boot_params->hdr.cmdline_size= vmm_strlen(new_cmdline)+1; 1631 new_boot_params->hdr.cmd_line_ptr= (uint32_t) new_cmdline; 1632 } 1633 else { 1634 new_cmdline= NULL; 1635 new_boot_params->hdr.cmdline_size= 0; 1636 new_boot_params->hdr.cmd_line_ptr= (uint32_t) 0; 1637 } 1638 #endif 1639 // set edi register 1640 linux_edi_register= linux_boot_parameters; 1641 #ifdef JLMDEBUG1 // TEST1 1642 bprint("edi reg: %08x\n", linux_edi_register); 1643 bprint("cmd_ptr: %08x, code32_start: %08x\n", new_boot_params->hdr.cmd_line_ptr, 1644 new_boot_params->hdr.code32_start); 1645 LOOP_FOREVER 1646 #endif 1647 return 0; 1648 } 1649 1650 1651 int prepare_linux_image_for_evmm(multiboot_info_t *mbi) 1652 { 1653 if (linux_start==0 || initram_start==0) { 1654 bprint("bad linux image or initram image\n"); 1655 return 1; 1656 } 1657 1658 // make linux mbi 1659 // get correct command line and correct mbi 1660 vmm_memset((void*) &linux_mbi, 0, sizeof(multiboot_info_t)); 1661 vmm_memcpy((void*) &linux_mbi, (void*)mbi, sizeof(multiboot_info_t)); 1662 #ifdef JLMDEBUG1 1663 if ( mbi->flags & MBI_CMDLINE ) { 1664 bprint("copied mbi has a command line\n"); 1665 } 1666 else { 1667 bprint("copied mbi does not have a command line\n"); 1668 } 1669 bprint("original mbi, %d modules, size %d\n", 1670 mbi->mods_count, sizeof(multiboot_info_t)); 1671 #endif 1672 if(linux_command_line!=0) { 1673 mbi->flags|= MBI_CMDLINE; 1674 mbi->cmdline= (uint32_t)linux_command_line; 1675 } 1676 linux_mbi.mods_count--; 1677 linux_mbi.mods_addr+= sizeof(module_t); 1678 linux_mbi.mmap_addr= (uint32_t)g_copy_e820_map; 1679 linux_mbi.mmap_length= g_nr_map*sizeof(memory_map_t); 1680 1681 #ifdef JLMDEBUG1 1682 bprint("linux mbi, %d modules\n", linux_mbi.mods_count); 1683 #endif 1684 1685 if(expand_linux_image(&linux_mbi, linux_start, linux_end-linux_start, 1686 initram_start, initram_end-initram_start, 1687 &linux_entry_address)!=0) { 1688 bprint("cannot expand linux image\n"); 1689 return 1; 1690 } 1691 #ifdef JLMDEBUG1 1692 bprint("linux_real_mode_start, linux_real_mode_size: 0x%08x %d\n", 1693 linux_real_mode_start, linux_real_mode_size); 1694 bprint("linux_protected_mode_start, linux_protected_mode_size: 0x%08x %d\n", 1695 linux_protected_mode_start, linux_protected_mode_size); 1696 bprint("initram_start_address: 0x%08x\n", initram_start_address); 1697 #endif 1698 if(prepare_primary_guest_args(&linux_mbi)!=0) { 1699 bprint("cannot prepare_primary_guest_args\n"); 1700 return 1; 1701 } 1702 // CHECK 1703 linux_start_address= linux_protected_mode_start; 1704 1705 #ifdef JLMDEBUG1 1706 // print header 1707 linux_kernel_header_t* hdr = (linux_kernel_header_t*) 1708 (linux_start + KERNEL_HEADER_OFFSET); 1709 bprint("linux header\n"); 1710 bprint("setup_sects 0x%02x, code32_start 0x%08x\n", 1711 hdr->setup_sects, hdr->code32_start); 1712 bprint("payload offset 0x%08x, payload length 0x%08x\n", 1713 hdr->payload_offset, hdr->payload_length); 1714 bprint("heap_end_ptr 0x%08x, command line: 0x%08x\n", 1715 hdr->heap_end_ptr, hdr->cmd_line_ptr); 1716 bprint("ramdisk image 0x%08x, ramdisk size %d\n", 1717 hdr->ramdisk_image, hdr->ramdisk_size); 1718 #endif 1719 return 0; 1720 } 1721 1722 1723 #define MIN_ANONYMOUS_GUEST_ID 30000 1724 #define GUEST_IS_PRIMARY_FLAG 1 1725 #define GUEST_IS_NMI_OWNER_FLAG 2 1726 #define GUEST_IS_ACPI_OWNER_FLAG 4 1727 #define GUEST_IS_DEFAULT_DEVICE_OWNER_FLAG 8 1728 #define GUEST_BIOS_ACCESS_ENABLED_FLAG 16 1729 #define GUEST_SAVED_IMAGE_IS_COMPRESSED_FLAG 32 1730 1731 1732 int prepare_primary_guest_environment(const multiboot_info_t *mbi) 1733 { 1734 // setup stack ,control and gp registers for VMCS to init guest 1735 // Guest wakes up in 32 bit protected mode with arguments in edi 1736 linux_setup(); 1737 1738 // Guest state initialization for relocated image 1739 evmm_g0.size_of_this_struct = sizeof(VMM_GUEST_STARTUP); 1740 evmm_g0.version_of_this_struct = VMM_GUEST_STARTUP_VERSION; 1741 evmm_g0.flags = 0; 1742 BITMAP_SET(evmm_g0.flags, VMM_GUEST_FLAG_LAUNCH_IMMEDIATELY); 1743 evmm_g0.flags= GUEST_IS_PRIMARY_FLAG | GUEST_IS_DEFAULT_DEVICE_OWNER_FLAG; 1744 evmm_g0.guest_magic_number = MIN_ANONYMOUS_GUEST_ID; 1745 evmm_g0.cpu_affinity = -1; 1746 #if 0 1747 evmm_g0.cpu_states_count = 1+evmm_num_of_aps; 1748 #else 1749 evmm_g0.cpu_states_count = 1; 1750 #endif 1751 evmm_g0.devices_count = 0; 1752 evmm_g0.image_size = 0; // linux_end - linux_start; 1753 evmm_g0.image_address= linux_start_address; 1754 evmm_g0.image_offset_in_guest_physical_memory = linux_start_address; 1755 evmm_g0.physical_memory_size = 0; 1756 1757 // linux state was prepared by linux_setup 1758 evmm_g0.cpu_states_array = (uint32_t)guest_processor_state; 1759 1760 // This pointer makes sense only if the devices_count > 0 1761 evmm_g0.devices_array = 0; 1762 1763 // Startup struct initialization 1764 p_startup_struct->version_of_this_struct = VMM_STARTUP_STRUCT_VERSION; 1765 p_startup_struct->size_of_this_struct = sizeof(VMM_STARTUP_STRUCT); 1766 p_startup_struct->number_of_processors_at_install_time = 1+evmm_num_of_aps; 1767 p_startup_struct->number_of_processors_at_boot_time = 1+evmm_num_of_aps; 1768 p_startup_struct->number_of_secondary_guests = 0; 1769 p_startup_struct->size_of_vmm_stack = UVMM_DEFAULT_STACK_SIZE_PAGES; 1770 p_startup_struct->unsupported_vendor_id = 0; 1771 p_startup_struct->unsupported_device_id = 0; 1772 p_startup_struct->flags = 0; 1773 1774 p_startup_struct->default_device_owner= PRIMARY_MAGIC; 1775 p_startup_struct->acpi_owner= PRIMARY_MAGIC; 1776 p_startup_struct->nmi_owner= PRIMARY_MAGIC; 1777 p_startup_struct->primary_guest_startup_state = 1778 (uint64_t)(uint32_t)&evmm_g0; 1779 1780 // excluded regions 1781 p_startup_struct->num_excluded_regions= 4; 1782 // remove evmm from ept 1783 // tmroeder: switched from evmm_image_size to evmm_mem_size 1784 (p_startup_struct->vmm_memory_layout[0]).image_size = 1785 evmm_mem_size; 1786 (p_startup_struct->vmm_memory_layout[0]).total_size = 1787 evmm_total_size; 1788 (p_startup_struct->vmm_memory_layout[0]).base_address = 1789 evmm_start_address; 1790 (p_startup_struct->vmm_memory_layout[0]).entry_point = 1791 vmm_main_entry_point; 1792 // remove evmm initial data from ept 1793 (p_startup_struct->vmm_memory_layout[1]).image_size = 1794 evmm_heap_size; 1795 (p_startup_struct->vmm_memory_layout[1]).total_size = 1796 evmm_heap_size; 1797 (p_startup_struct->vmm_memory_layout[1]).base_address = 1798 evmm_heap_base; 1799 (p_startup_struct->vmm_memory_layout[1]).entry_point = 1800 evmm_heap_base; 1801 // remove tboot from ept 1802 (p_startup_struct->vmm_memory_layout[2]).image_size = 1803 tboot_end-tboot_start; 1804 (p_startup_struct->vmm_memory_layout[2]).total_size = 1805 tboot_end-tboot_start; 1806 (p_startup_struct->vmm_memory_layout[2]).base_address = 1807 tboot_start; 1808 (p_startup_struct->vmm_memory_layout[2]).entry_point = 1809 tboot_start; 1810 // remove bootstrap from ept 1811 (p_startup_struct->vmm_memory_layout[3]).image_size = 1812 bootstrap_end-bootstrap_start; 1813 (p_startup_struct->vmm_memory_layout[3]).total_size = 1814 bootstrap_end-bootstrap_start; 1815 (p_startup_struct->vmm_memory_layout[3]).base_address = 1816 bootstrap_start; 1817 (p_startup_struct->vmm_memory_layout[3]).entry_point = 1818 bootstrap_start; 1819 1820 // set up evmm e820 table 1821 p_startup_struct->physical_memory_layout_E820 = evmm_get_e820_table(mbi); 1822 if (p_startup_struct->physical_memory_layout_E820 == (uint32_t)-1) { 1823 bprint("Error getting e820 table\n"); 1824 return 1; 1825 } 1826 1827 // application parameters 1828 // This structure is not used so the setting is probably OK. 1829 evmm_a0.size_of_this_struct = sizeof(VMM_APPLICATION_PARAMS_STRUCT); 1830 evmm_a0.number_of_params = 0; 1831 evmm_a0.session_id = 0; 1832 evmm_a0.address_entry_list = 0; 1833 evmm_a0.entry_number = 0; 1834 #if 0 1835 evmm_a0.fadt_gpa = NULL; 1836 evmm_a0.dmar_gpa = NULL; 1837 #endif 1838 1839 return 0; 1840 } 1841 1842 1843 // ------------------------------------------------------------------------- 1844 1845 1846 // Functions for relocating images 1847 1848 1849 elf64_phdr* get_program_load_header(uint32_t image) 1850 { 1851 elf64_hdr* hdr = (elf64_hdr*) image; 1852 elf64_phdr* prog_header= NULL; 1853 int i; 1854 1855 #ifdef JLMDEBUG1 1856 bprint("get_program_load_header: %d segments, entry size is %d, offset: 0x%08x\n", 1857 (int)hdr->e_phnum, (uint32_t)hdr->e_phentsize, (uint32_t)hdr->e_phoff); 1858 #endif 1859 for(i=0; i<(int)hdr->e_phnum;i++) { 1860 prog_header= (elf64_phdr*)(image+(uint32_t)hdr->e_phoff+i*((uint32_t)hdr->e_phentsize)); 1861 #ifdef JLMDEBUG1 1862 bprint("segment entry: %d 0x%08x, offset: 0x%08x\n", 1863 (int)prog_header->p_type, (uint32_t)prog_header->p_vaddr, 1864 (uint32_t)prog_header->p_offset); 1865 #endif 1866 if(prog_header->p_type==ELF64_PT_LOAD) { 1867 return prog_header; 1868 } 1869 } 1870 return NULL; 1871 } 1872 1873 1874 #define EM_X86_64 62 1875 uint64_t OriginalEntryAddress(uint32_t base) 1876 { 1877 elf64_hdr* elf= (elf64_hdr*) base; 1878 if(elf->e_machine!=EM_X86_64) 1879 return 0ULL; 1880 return elf->e_entry; 1881 } 1882 1883 1884 // get rid of hole from tboot 1885 // from 0x4005000 to 0xb83f3000 1886 /*uint32_t size; 1887 uint32_t base_addr_low; 1888 uint32_t base_addr_high; 1889 uint32_t length_low; 1890 uint32_t length_high; 1891 uint32_t type; 1892 */ 1893 void fixe820map() 1894 { 1895 unsigned int i; 1896 memory_map_t *entry; 1897 1898 for(i=0; i<g_nr_map; i++) { 1899 entry= &bootstrap_e820[i]; 1900 if(entry->base_addr_low<0xa0000000 && 1901 (entry->base_addr_low+entry->size)>0xa0000000 && 1902 entry->base_addr_high==0) { 1903 entry->type= E820_RAM; 1904 break; 1905 } 1906 } 1907 } 1908 1909 1910 // ------------------------------------------------------------------------- 1911 1912 1913 // main 1914 // tboot jumps in here 1915 int start32_evmm(uint32_t magic, multiboot_info_t* mbi, uint32_t initial_entry) 1916 { 1917 int i; 1918 module_t* m; 1919 1920 // reinitialize screen printing 1921 bootstrap_partial_reset(); 1922 #ifdef JLMDEBUG 1923 bprint("start32_evmm, mbi: %08x, initial_entry: %08x, magic: %08x\n", 1924 (uint32_t)mbi, initial_entry, magic); 1925 #endif 1926 1927 // print out the registers 1928 // set mbi to 0x10000 1929 bprint("shared_page = %p\n", shared_page); 1930 mbi = (multiboot_info_t*)0x10000; 1931 // tboot start/end, this is the only data from the shared 1932 // page we use 1933 #ifdef TBOOT_SHARED_PAGE 1934 tboot_start= shared_page->tboot_base; 1935 tboot_end= shared_page->tboot_base+shared_page->tboot_size; 1936 #else 1937 tboot_start= 0; 1938 tboot_end= 0; 1939 #endif 1940 1941 // bootstrap's start (load) address and its end address 1942 bootstrap_start= (uint32_t)&_start_bootstrap; 1943 bootstrap_end= (uint32_t)&_end_bootstrap; 1944 1945 // We assume the standard with three modules after bootstrap: 1946 // 64-bit evmm, the linux image and initram fs. 1947 // Everything is decompressed EXCEPT the protected mode portion of linux 1948 int l= mbi->mods_count; 1949 if (l!=3) { 1950 bprint("bootstrap error: wrong number of modules %d\n", l); 1951 LOOP_FOREVER 1952 } 1953 1954 m= get_module(mbi, 0); 1955 evmm_start= (uint32_t)m->mod_start; 1956 evmm_end= (uint32_t)m->mod_end; 1957 evmm_command_line= (char*)m->string; 1958 1959 m= get_module(mbi, 1); 1960 linux_start= (uint32_t)m->mod_start; 1961 linux_end= (uint32_t)m->mod_end; 1962 linux_command_line= (char*)m->string; 1963 1964 if(l>2) { 1965 m= get_module(mbi, 2); 1966 initram_start= (uint32_t)m->mod_start; 1967 initram_end= (uint32_t)m->mod_end; 1968 } 1969 1970 #ifdef JLMDEBUG1 1971 // shared page 1972 bprint("tboot_start, tboot_end: 0x%08x 0x%08x\n", tboot_start, tboot_end); 1973 bprint("bootstrap_start, bootstrap_end: 0x%08x 0x%08x, size: %d\n", 1974 bootstrap_start, bootstrap_end, bootstrap_end-bootstrap_start); 1975 bprint("evmm_start, evmm_end: 0x%08x 0x%08x\n", evmm_start, evmm_end); 1976 if(evmm_command_line==0) 1977 bprint("evmm command line is NULL\n"); 1978 else 1979 bprint("evmm command line: %s\n", evmm_command_line); 1980 bprint("linux_start, linux_end: 0x%08x 0x%08x\n", linux_start, linux_end); 1981 if(linux_command_line==0) 1982 bprint("linux command line is NULL\n"); 1983 else 1984 bprint("linux command line: %s\n", linux_command_line); 1985 bprint("initram_start, initram_end: 0x%08x 0x%08x\n", initram_start, initram_end); 1986 #endif 1987 1988 // initialize stack array 1989 for(i=0;i<MAXPROCESSORS;i++) 1990 evmm_stack_pointers_array[i]= 0; 1991 1992 // get CPU info 1993 uint32_t info; 1994 __asm__ volatile ( 1995 "\tmovl $1, %%eax\n" 1996 "\tcpuid\n" 1997 "\tmovl %%ebx, %[info]\n" 1998 : [info] "=m" (info) 1999 : : "%eax", "%ebx", "%ecx", "%edx"); 2000 // NOTE: changed shift from 16 to 18 to get the right answer 2001 evmm_num_of_aps = ((info>>18)&0xff)-1; 2002 if(evmm_num_of_aps < 0) 2003 evmm_num_of_aps = 0; 2004 if(evmm_num_of_aps>=MAXPROCESSORS) { 2005 bprint("Too many aps (%d), resetting to %d\n", evmm_num_of_aps, MAXPROCESSORS); 2006 evmm_num_of_aps = MAXPROCESSORS-1; 2007 } 2008 #ifndef MULTIAPS_ENABLED 2009 evmm_num_of_aps = 0; 2010 #endif 2011 2012 // get tboot gdtr 2013 __asm__ volatile ( 2014 "\tsgdt %[tboot_gdtr_32]\n" 2015 :[tboot_gdtr_32] "=m" (tboot_gdtr_32) 2016 ::); 2017 2018 // make a fake TSS to keep vmcs happy 2019 *((UINT64*)tboot_gdtr_32.base+24)= 0x00808b0000000000ULL; 2020 *((UINT64*)tboot_gdtr_32.base+32)= 0ULL; 2021 tboot_gdtr_32.limit= 39; 2022 tboot_tr_attr= 0x0000808b; 2023 tboot_tr_limit= 0xffff; 2024 ia32_write_ts(24); 2025 2026 tboot_cs_selector= ia32_read_cs(); 2027 tboot_ds_selector= ia32_read_ds(); 2028 tboot_ss_selector= ia32_read_ss(); 2029 tboot_tr_selector= ia32_read_ts(); 2030 ia32_read_msr(IA32_MSR_DEBUGCTL, &tboot_msr_debugctl); 2031 ia32_read_msr(IA32_MSR_EFER, &tboot_msr_efer); 2032 ia32_read_msr(IA32_MSR_PAT, &tboot_msr_pat); 2033 ia32_read_msr(IA32_MSR_SYSENTER_ESP, &tboot_msr_sysenter_esp); 2034 ia32_read_msr(IA32_MSR_SYSENTER_EIP, &tboot_msr_sysenter_eip); 2035 ia32_read_msr(IA32_MSR_SYSENTER_CS, &tboot_msr_sysenter_cs); 2036 read_cr0(&tboot_cr0); 2037 read_cr2(&tboot_cr2); 2038 read_cr3(&tboot_cr3); 2039 read_cr4(&tboot_cr4); 2040 2041 #ifdef JLMDEBUG1 2042 bprint("original gdt\n"); 2043 HexDump((uint8_t*) tboot_gdtr_32.base, 2044 (uint8_t*) tboot_gdtr_32.base+tboot_gdtr_32.limit); 2045 #endif 2046 2047 uint32_t* p; 2048 p= (uint32_t*)(tboot_gdtr_32.base+tboot_cs_selector); 2049 ia32_get_selector(p, &tboot_cs_base, &tboot_cs_limit, &tboot_cs_attr); 2050 p= (uint32_t*)(tboot_gdtr_32.base+tboot_ds_selector); 2051 ia32_get_selector(p, &tboot_ds_base, &tboot_ds_limit, &tboot_ds_attr); 2052 p= (uint32_t*)(tboot_gdtr_32.base+tboot_ss_selector); 2053 ia32_get_selector(p, &tboot_ss_base, &tboot_ss_limit, &tboot_ss_attr); 2054 p= (uint32_t*)(tboot_gdtr_32.base+tboot_tr_selector); 2055 ia32_get_selector(p, &tboot_tr_base, &tboot_tr_limit, &tboot_tr_attr); 2056 2057 #ifdef JLMDEBUG1 2058 bprint("gdt base: %08x, limit: %04x\n", tboot_gdtr_32.base, tboot_gdtr_32.limit); 2059 bprint("tboot cs selector: %04x, base: %08x, limit: %04x, attr: %04x\n", 2060 tboot_cs_selector, tboot_cs_base, tboot_cs_limit, tboot_cs_attr); 2061 bprint("tboot ds selector: %04x, base: %08x, limit: %04x, attr: %04x\n", 2062 tboot_ds_selector, tboot_ds_base, tboot_ds_limit, tboot_ds_attr); 2063 bprint("tboot ss selector: %04x, base: %08x, limit: %04x, attr: %04x\n", 2064 tboot_ss_selector, tboot_ss_base, tboot_ss_limit, tboot_ss_attr); 2065 bprint("tboot tr selector: %04x, base: %08x, limit: %04x, attr: %04x\n", 2066 tboot_tr_selector, tboot_tr_base, tboot_tr_limit, tboot_tr_attr); 2067 HexDump((uint8_t*) tboot_gdtr_32.base, 2068 (uint8_t*) tboot_gdtr_32.base+tboot_gdtr_32.limit); 2069 // LOOP_FOREVER 2070 bprint("msr_debugctl: 0x%016llx\n", tboot_msr_debugctl); 2071 bprint("msr_efer: 0x%016llx\n", tboot_msr_efer); 2072 bprint("msr_pat: 0x%016llx\n", tboot_msr_pat); 2073 bprint("msr_sysenter_esp: 0x%016llx\n", tboot_msr_sysenter_esp); 2074 bprint("msr_sysenter_eip: 0x%016llx\n", tboot_msr_sysenter_eip); 2075 bprint("msr_sysenter_cs: 0x%016llx\n", tboot_msr_sysenter_cs); 2076 #endif 2077 2078 init32.i32_low_memory_page = low_mem; 2079 init32.i32_num_of_aps = evmm_num_of_aps; 2080 2081 // set up evmm heap addresses and range 2082 setup_evmm_heap(EVMM_HEAP_BASE, EVMM_HEAP_SIZE); 2083 2084 // Relocate evmm_image 2085 evmm_start_address= EVMM_DEFAULT_START_ADDR; 2086 elf64_phdr* prog_header= get_program_load_header(evmm_start); 2087 if(prog_header==NULL) { 2088 bprint("Cant find load program header\n"); 2089 LOOP_FOREVER 2090 } 2091 2092 uint32_t evmm_start_load_segment= 0; 2093 evmm_start_load_segment= evmm_start+((uint32_t)prog_header->p_offset); 2094 evmm_image_size= (uint32_t) prog_header->p_filesz; 2095 evmm_orig_mem_size= (uint32_t) prog_header->p_memsz; 2096 evmm_mem_size= PAGE_UP(evmm_orig_mem_size); // full page 2097 evmm_total_size= evmm_mem_size+UVMM_DEFAULT_HEAP; 2098 2099 if(((uint32_t)(prog_header->p_vaddr))!=evmm_start_address) { 2100 bprint("evmm load address is not default: 0x%08x, actual: 0x%08x\n", 2101 evmm_start_address, evmm_start_load_segment); 2102 LOOP_FOREVER 2103 } 2104 2105 vmm_memcpy((void *)evmm_start_address, 2106 (const void*) evmm_start_load_segment, 2107 evmm_image_size); 2108 // zero everything after image 2109 vmm_memset((void *)(evmm_start_address+evmm_image_size), 2110 0, evmm_total_size-evmm_image_size); 2111 2112 // Get entry point 2113 vmm_main_entry_point = (uint32_t)OriginalEntryAddress(evmm_start); 2114 if(vmm_main_entry_point==0) { 2115 bprint("OriginalEntryAddress: bad elf format\n"); 2116 LOOP_FOREVER 2117 } 2118 2119 #ifdef JLMDEBUG1 2120 bprint("\tevmm_heap_base evmm_heap_size: 0x%08x 0x%08x\n", 2121 evmm_heap_base, evmm_heap_size); 2122 bprint("\trelocated evmm_start_address: 0x%08x\nvmm_main_entry_point: 0x%08x\n", 2123 evmm_start_address, vmm_main_entry_point); 2124 bprint("\tprogram header load address: 0x%08x, image size: 0x%08x, total size: 0x%08x\n", 2125 (uint32_t)(prog_header->p_vaddr), evmm_image_size, evmm_total_size); 2126 #endif 2127 2128 // setup our e820 table (20 byte format) 2129 max_e820_entries= E820MAX; // copied e820 globals 2130 g_nr_map= 0; 2131 g_copy_e820_map= bootstrap_e820; 2132 if(copy_e820_map(mbi)!=true) { 2133 bprint("cant copy e820 map\n"); 2134 LOOP_FOREVER 2135 } 2136 2137 #ifdef JLMDEBUG1 2138 bprint("%d e820 entries after copy, original had %d\n", 2139 g_nr_map, mbi->mmap_length/sizeof(memory_map_t)); 2140 #endif 2141 2142 // tboot reserves the region were putting stuff in (see below) 2143 // Tboot's explaination 2144 // Tboot reserves the following regions: 0x20200000 - 0x40004000, 2145 // 0x40005000 - 0xb88f3000, 0xb9bff000 - 0xb9c00000 2146 // because some legacy bios's put USB buffers there 2147 // which causes problems if they are DMA protected. 2148 // we're going to ignore this because we want to 2149 // put bootstrap and evmm here. 2150 // What BIOS's have this problem? 2151 // Answer: 2152 // Both igfx & legacy USB support might reserve small chunks of memory 2153 // (0x20000000~0x20200000 is for igfx, 0x40004000~0x40005000 is for usb). 2154 // Even in latest Lenovo laptop these memory holes is still existing. 2155 // Tboot added a min_ram=0xXXXXXXXX option to get the usable memory 2156 // back if the size exceeds the number in this option, but the side 2157 // effect is the device memory before the reenabled usabled ram will 2158 // be DMA protected. 2159 fixe820map(); 2160 2161 // reserve bootstrap 2162 if(!e820_reserve_ram(bootstrap_start, (bootstrap_end - bootstrap_start))) { 2163 bprint("Unable to reserve bootstrap region in e820 table\n"); 2164 LOOP_FOREVER 2165 } 2166 2167 // JLM(FIX):the start and end should be at page boundries 2168 // reserve evmm area 2169 // need to include additional heap too evmm_total_size 2170 if (!e820_reserve_ram(evmm_heap_base, (evmm_heap_base+evmm_heap_size+evmm_total_size))) { 2171 bprint("Unable to reserve evmm region in e820 table\n"); 2172 LOOP_FOREVER 2173 } 2174 2175 #ifdef JLMDEBUG1 2176 bprint("start of evmm exclusion: 0x%08x, end of exclusion: 0x%08lx\n", 2177 evmm_heap_base, evmm_heap_base+evmm_heap_size+evmm_total_size); 2178 bprint("%d e820 entries after new reservations\n", g_nr_map); 2179 print_map(&g_copy_e820_map[5], 10); 2180 #endif 2181 2182 #ifdef SETUPIDT 2183 extern void SetupIDT(); // this is not needed 2184 SetupIDT(); 2185 #endif 2186 2187 // setup gdt for 64-bit on BSP 2188 if(setup_64bit()!=0) { 2189 bprint("Unable to setup 64 bit paging\n"); 2190 LOOP_FOREVER 2191 } 2192 2193 // Allocate stack and set rsp (esp) 2194 if(setup_evmm_stacks()!=0) { 2195 bprint("can't allocate stack\n"); 2196 LOOP_FOREVER 2197 } 2198 2199 #ifdef JLMDEBUG1 2200 bprint("evmm_bsp_stack: 0x%08x\n", evmm_bsp_stack); 2201 bprint("%d processors, stacks:\n", 1+evmm_num_of_aps); 2202 for(i=0; i<=evmm_num_of_aps; i++) { 2203 bprint("%08x ", evmm_stack_pointers_array[i]); 2204 } 2205 bprint("\n"); 2206 #endif 2207 2208 // this is used by the wakup code in sipi init 2209 // CHECK(JLM): maybe this should be &evmm_stack_pointers_array[1] 2210 init32.i32_esp= evmm_stack_pointers_array; 2211 2212 // We need to allocate this before guest setup 2213 if(allocate_linux_data()!=0) { 2214 bprint("Cant allocate data area for primary linux guest\n"); 2215 LOOP_FOREVER 2216 } 2217 2218 // mark linux data area as reserved 2219 #ifdef LOWBOOTPARAMS 2220 if(!e820_reserve_ram(linux_stack_base, 2221 evmm_heap_base-linux_stack_base)) { 2222 bprint("Unable to reserve bootstrap region in e820 table\n"); 2223 LOOP_FOREVER 2224 } 2225 if(!e820_reserve_ram(linux_boot_parameters, 2*PAGE_SIZE)) { 2226 bprint("Unable to reserve bootstrap region in e820 table\n"); 2227 LOOP_FOREVER 2228 } 2229 #else 2230 if(!e820_reserve_ram(linux_boot_parameters, 2231 evmm_heap_base-linux_boot_parameters)) { 2232 bprint("Unable to reserve bootstrap region in e820 table\n"); 2233 LOOP_FOREVER 2234 } 2235 #endif 2236 2237 // prepare linux for evmm 2238 if(prepare_linux_image_for_evmm(mbi)) { 2239 bprint("Cant prepare linux image\n"); 2240 LOOP_FOREVER 2241 } 2242 2243 // copy linux data that is passed to linux in call 2244 if(prepare_primary_guest_environment(&linux_mbi)!=0) { 2245 bprint("Error setting up evmm startup arguments\n"); 2246 LOOP_FOREVER 2247 } 2248 2249 #ifdef JLMDEBUG1 2250 // Print final parameters for linux 2251 boot_params_t* new_boot_params= (boot_params_t*)linux_boot_parameters; 2252 bprint("Final Linux parameters\n"); 2253 bprint("\tShared page address: 0x%016lx %d e820 entries\n", 2254 (long unsigned int)*(uint64_t*)new_boot_params->tboot_shared_addr, 2255 new_boot_params->e820_entries); 2256 bprint("\tCode32_start: 0x%08x, ramdisk: 0x%08x, ramdisk size: %d\n", 2257 new_boot_params->hdr.code32_start, 2258 new_boot_params->hdr.ramdisk_image, 2259 new_boot_params->hdr.ramdisk_size); 2260 char* s= (char*) new_boot_params->hdr.cmd_line_ptr; 2261 if(s!=NULL || vmm_strlen(s)<100) { 2262 bprint("\tcommand line: %s\n", s); 2263 } 2264 else { 2265 bprint("\tinvalid command line\n"); 2266 } 2267 #endif 2268 #ifdef JLMDEBUG1 2269 bprint("code at evmm start\n"); 2270 HexDump((uint8_t*)evmm_start_address, (uint8_t*)evmm_start_address+10); 2271 HexDump((uint8_t*)linux_start_address, (uint8_t*)linux_start_address+10); 2272 #endif 2273 2274 // FIX (JLM): In evmm, exclude tboot and bootstrap areas from primary space 2275 // CHECK (JLM): check that everything is measured (bootstrap, evmm) 2276 2277 // set up evmm stack for vmm_main call and flip tp 64 bit mode 2278 // vmm_main call: 2279 // vmm_main(uint32_t local_apic_id, uint64_t startup_struct_u, 2280 // uint64_t application_params_struct_u, 2281 // uint64_t reserved UNUSED) 2282 2283 #ifdef JLMDEBUG1 2284 bprint("evmm_image_size: 0x%016lx, evmm_mem_size: 0x%016lx\n", 2285 (long unsigned int)evmm_image_size, (long unsigned int)evmm_mem_size); 2286 bprint("Orig extra mem: 0x%016lx, diff: 0x%016lx\n", 2287 (long unsigned int)evmm_orig_mem_size, 2288 (long unsigned int)(evmm_orig_mem_size-evmm_image_size)); 2289 bprint("evmm_total_size: 0x%016lx\n", 2290 (long unsigned int)evmm_total_size); 2291 bprint("Evmm descriptor table, cs selector: 0x%08x, cr3: 0x%08x\n", 2292 (uint32_t) evmm64_cs_selector, (uint32_t) evmm64_cr3); 2293 HexDump((uint8_t*)gdtr_64.base, 2294 (uint8_t*)gdtr_64.base+gdtr_64.limit); 2295 bprint("Tboot descriptors, cs_sel: %04x, ds_sel: %04x, ss_sel: %04x, tr_sel: %04x:\n", 2296 tboot_cs_selector,tboot_ds_selector, 2297 tboot_ss_selector, tboot_tr_selector); 2298 bprint("Tboot attrs, cs_attr: %08x, ds_attr: %08x, ss_attr: %08x, tr_attr: %08x:\n", 2299 tboot_cs_attr,tboot_ds_attr, 2300 tboot_ss_attr, tboot_tr_attr); 2301 bprint("Tboot limits, cs_limit: %04x, ds_limit: %04x, ss_limit: %04x, tr_limit: %04x:\n", 2302 tboot_cs_limit,tboot_ds_limit, tboot_ss_limit, tboot_tr_limit); 2303 HexDump((uint8_t*)(tboot_gdtr_32.base), 2304 (uint8_t*)(tboot_gdtr_32.base+tboot_gdtr_32.limit)); 2305 bprint("cr0: %08x, cr2: %08x, cr3: %08x, cr4: %08x\n", guest_cr0, 2306 guest_cr2, guest_cr3, guest_cr4); 2307 bprint("Guest descriptor table, cs_sel: %04x, ds_sel: %04x, ss_sel: %04x, tr_sel: %04x:\n", 2308 guest_cs_selector, guest_ds_selector, 2309 guest_ss_selector, guest_tr_selector); 2310 HexDump((uint8_t*)(guest_gdtr.base), 2311 (uint8_t*)(guest_gdtr.base+guest_gdtr.limit)); 2312 bprint("arguments to vmm_main: "); 2313 bprint("apic %d, p_startup_struct, 0x%08x\n", 2314 (int) local_apic_id, (int) p_startup_struct); 2315 bprint("linux stack base: %08x, linux_edi_reg: %08x\n", 2316 linux_stack_base, linux_edi_register); 2317 bprint("\tapplication struct 0x%08x, reserved, 0x%08x\n", 2318 (int)evmm_p_a0, (int)evmm_reserved); 2319 #endif 2320 #ifdef JLMDEBUG1 2321 boot_params_t* linux_boot_params= (boot_params_t*)linux_boot_parameters; 2322 bprint("bootparams at: %p, e820 map in boot params to linux (%d entries)\n", 2323 linux_boot_params, linux_boot_params->e820_entries); 2324 bprint("command line: %s\n", 2325 (char*) (linux_boot_parameters+sizeof(boot_params_t))); 2326 e820entry_t* pent= linux_boot_params->e820_map; 2327 for(i=0; i<linux_boot_params->e820_entries;i++) { 2328 bprint("\taddr: %016llx, size: %lld, type: %d\n", 2329 pent->addr, pent->size, pent->type); 2330 pent++; 2331 } 2332 #endif 2333 2334 if(evmm64_cs_selector!=64) { 2335 bprint("cs_selector is wrong, %0x\n", evmm64_cs_selector); 2336 LOOP_FOREVER 2337 } 2338 2339 if (evmm_num_of_aps > 0) { 2340 #ifdef JLMDEBUG 2341 bprint("about to call startap_main, %d aps\n", evmm_num_of_aps); 2342 #endif 2343 startap_main(&init32, &init64, p_startup_struct, vmm_main_entry_point); 2344 } 2345 2346 __asm__ volatile ( 2347 2348 "\tcli\n" 2349 2350 // move entry point to ebx for jump 2351 "\tmovl %[vmm_main_entry_point], %%ebx\n" 2352 2353 // initialize CR3 with PML4 base 2354 "\tmovl %[evmm64_cr3], %%eax\n" 2355 "\tmovl %%eax, %%cr3 \n" 2356 2357 // evmm_initial_stack points to the start of the stack 2358 "\tmovl %[evmm_bsp_stack], %%esp\n" 2359 "\tandl $0xfffffff8, %%esp\n" 2360 2361 // prepare arguments for 64-bit mode 2362 // there are 4 arguments (including reserved) 2363 "\txor %%eax, %%eax\n" 2364 "\tpush %%eax\n" 2365 "\tpush %[evmm_reserved]\n" 2366 "\tpush %%eax\n" 2367 "\tpush %[evmm_p_a0]\n" 2368 "\tpush %%eax\n" 2369 "\tpush %[p_startup_struct]\n" 2370 "\tpush %%eax\n" 2371 "\tpush %[local_apic_id]\n" 2372 2373 // enable 64-bit mode 2374 // EFER MSR register 2375 "\tmovl $0x0c0000080, %%ecx\n" 2376 // read EFER into EAX 2377 "\trdmsr\n" 2378 2379 // set EFER.LME=1 2380 "\tbts $8, %%eax\n" 2381 // write EFER 2382 "\twrmsr\n" 2383 2384 // enable paging CR0.PG=1 2385 "\tmovl %%cr0, %%eax\n" 2386 "\tbts $31, %%eax\n" 2387 "\tmovl %%eax, %%cr0\n" 2388 2389 // at this point we are in 32-bit compatibility mode 2390 // LMA=1, CS.L=0, CS.D=1 2391 // jump from 32bit compatibility mode into 64bit mode. 2392 2393 // mode switch 2394 "ljmp $64, $1f\n" 2395 2396 "1:\n" 2397 // in 64 bit this is actually pop rdi (local apic) 2398 "\tpop %%edi\n" 2399 // in 64 bit this is actually pop rsi (startup struct) 2400 "\tpop %%esi\n" 2401 // in 64 bit this is actually pop rdx (application struct) 2402 "\tpop %%edx\n" 2403 // in 64 bit this is actually pop rcx (reserved) 2404 "\tpop %%ecx\n" 2405 2406 "\tjmp *%%ebx\n" 2407 "\tud2\n" 2408 :: [vmm_main_entry_point] "m" (vmm_main_entry_point), 2409 [evmm_bsp_stack] "m" (evmm_bsp_stack), [evmm64_cr3] "m" (evmm64_cr3), 2410 [evmm64_cs_selector] "m" (evmm64_cs_selector), 2411 [evmm_reserved] "m" (evmm_reserved), 2412 [evmm_p_a0] "m" (evmm_p_a0), [p_startup_struct] "m" (p_startup_struct), 2413 [local_apic_id] "m" (local_apic_id) 2414 :); 2415 2416 return 0; 2417 } 2418