github.com/jlmucb/cloudproxy@v0.0.0-20170830161738-b5aa0b619bc4/cpvmm/vmm/bootstrap/bootstrap_ap_procs_init.c (about) 1 /* 2 * Copyright (c) 2013 Intel Corporation 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * Unless required by applicable law or agreed to in writing, software 9 * distributed under the License is distributed on an "AS IS" BASIS, 10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 * See the License for the specific language governing permissions and 12 * limitations under the License. 13 */ 14 15 16 // Perform application processors init 17 #include "bootstrap_types.h" 18 #include "vmm_defs.h" 19 #include "vmm_startup.h" 20 #include "bootstrap_ap_procs_init.h" 21 #include "bootstrap_print.h" 22 #include "common_libc.h" 23 #include "ia32_defs.h" 24 #include "x32_init64.h" 25 #include "em64t_defs.h" 26 27 28 #define JLMDEBUG 29 #ifdef JLMDEBUG 30 extern void HexDump(uint8_t*, uint8_t*); 31 extern uint16_t ia32_read_cs(); 32 extern uint16_t ia32_read_ds(); 33 extern uint16_t ia32_read_ss(); 34 #endif 35 36 #ifndef UNUSEDVAR 37 #define UNUSEDVAR(x) ((void)x) 38 #endif 39 40 41 // AP startup algorithm 42 // Stage 1 43 // BSP: 44 // 1. Copy APStartUpCode + GDT to low memory page 45 // 2. Clear APs counter 46 // 3. Send SIPI to all processors excluding self 47 // 4. Wait timeout 48 // APs on SIPI receive: 49 // 1. Switch to protected mode 50 // 2. lock inc APs counter + remember my AP number 51 // 3. Loop on wait_lock1 until it changes zero 52 // Stage 2 53 // BSP after timeout: 54 // 5. Read number of APs and allocate memory for stacks 55 // 6. Save GDT and IDT in global array 56 // 7. Clear ready_counter count 57 // 8. Set wait_lock1 to 1 58 // 9. Loop on ready_counter until it will be equal to number of APs 59 // APs on wait_1_lock set 60 // 4. Set stack in a right way 61 // 5. Set right GDT and IDT 62 // 6. Enter "C" code 63 // 7. Increment ready_counter 64 // 8. Loop on wait_lock2 until it changes from zero 65 // Stage 3 66 // BSP after ready_counter becomes == APs number 67 // 10. Return to user 68 // PROBLEM: 69 // NMI may crash the system if it comes before AP stack init done 70 71 72 #define IA32_DEBUG_IO_PORT 0x80 73 #define INITIAL_WAIT_FOR_APS_TIMEOUT_IN_MILIS 150000 74 75 void send_init_ipi(void); 76 void send_broadcast_init_sipi(INIT32_STRUCT *p_init32_data); 77 void send_ipi_to_all_excluding_self(uint32_t vector_number, 78 uint32_t delivery_mode); 79 80 extern void ia32_read_msr(uint32_t msr_id, uint64_t *p_value); 81 static uint32_t startap_tsc_ticks_per_msec = 0; 82 83 84 #define MP_BOOTSTRAP_STATE_INIT 0 85 #define MP_BOOTSTRAP_STATE_APS_ENUMERATED 1 86 87 88 static volatile uint32_t mp_bootstrap_state; 89 90 // stage 1 91 static uint32_t g_aps_counter = 0; 92 93 // stage 2 94 static uint8_t gp_GDT[6] = {0}; // xx:xxxx 95 static uint8_t gp_IDT[6] = {0}; // xx:xxxx 96 97 static volatile uint32_t g_ready_counter = 0; 98 static FUNC_CONTINUE_AP_BOOT g_user_func = 0; 99 static void* g_any_data_for_user_func = 0; 100 101 // 1 in i position means CPU[i] exists 102 static uint8_t ap_presence_array[VMM_MAX_CPU_SUPPORTED] = {0}; 103 extern uint32_t evmm_stack_pointers_array[]; 104 105 106 // Layout of low memory page 107 // APStartUpCode <--- low_mem 108 // GdtTable <-- ALIGN16(low_mem+sizeof(APStartupCode))= loc_gdt 109 // GDT Register <-- ALIGN16(loc_gdt+sizeof(gdt))= loc_gdtr 110 111 // Uncomment the following line to deadloop in AP startup 112 // #define BREAK_IN_AP_STARTUP 113 const uint8_t APStartUpCode[] = 114 { 115 #ifdef BREAK_IN_AP_STARTUP 116 0xEB, 0xFE, // jmp $ 117 #endif 118 0x0f, 0x01, 0x16, 0x00, 0x00, // 00: lgdt GDTR 119 0x0F, 0x20, 0xC0, // 05: mov eax,cr0 120 0x0C, 0x33, // 08: or al,1 121 0x0F, 0x22, 0xC0, // 10: mov cr0,eax 122 0x66, 0xEA, // 13: ljmp CS,CONT16 123 0x00, 0x00, 0x00, 0x00, // 15: CONT16 124 0x00, 0x00, // 19: CS_VALUE 125 //CONT16: 126 0xFA, // 21: cli 127 0x66, 0xB8, 0x00, 0x00, // 22: mov ax,DS_VALUE 128 0x66, 0x8E, 0xD8, // 26: mov ds,ax 129 0x66, 0xB8, 0x00, 0x00, // 29: mov ax,ES_VALUE 130 0x66, 0x8E, 0xC0, // 33: mov es,ax 131 0x66, 0xB8, 0x00, 0x00, // 36: mov ax,GS_VALUE 132 0x66, 0x8E, 0xE8, // 40: mov gs,ax 133 0x66, 0xB8, 0x00, 0x00, // 43: mov ax,FS_VALUE 134 0x66, 0x8E, 0xE0, // 47: mov fs,ax 135 0x66, 0xB8, 0x00, 0x00, // 50: mov ax,SS_VALUE 136 0x66, 0x8E, 0xD0, // 54: mov ss,ax 137 0xB8, 0x00, 0x00, 0x00, 0x00, // 57: mov eax,AP_CONTINUE_WAKEUP_CODE 138 0xFF, 0xE0, // 62: jmp eax 139 }; 140 141 #ifdef BREAK_IN_AP_STARTUP 142 #define AP_CODE_START 2 143 #else 144 #define AP_CODE_START 0 145 #endif 146 147 #define GDTR_OFFSET_IN_CODE (3 + AP_CODE_START) 148 #define CONT16_IN_CODE_OFFSET (15 + AP_CODE_START) 149 #define CS_IN_CODE_OFFSET (19 + AP_CODE_START) 150 #define DS_IN_CODE_OFFSET (24 + AP_CODE_START) 151 #define ES_IN_CODE_OFFSET (31 + AP_CODE_START) 152 #define GS_IN_CODE_OFFSET (38 + AP_CODE_START) 153 #define FS_IN_CODE_OFFSET (45 + AP_CODE_START) 154 #define SS_IN_CODE_OFFSET (52 + AP_CODE_START) 155 #define AP_CONTINUE_WAKEUP_CODE_IN_CODE_OFFSET (58 + AP_CODE_START) 156 157 #define CONT16_VALUE_OFFSET (21 + AP_CODE_START) 158 159 160 void ap_continue_wakeup_code(void); 161 void ap_continue_wakeup_code_C(uint32_t local_apic_id); 162 uint8_t bsp_enumerate_aps(void); 163 void ap_initialize_environment(void); 164 void mp_set_bootstrap_state(uint32_t new_state); 165 166 167 uint64_t startap_rdtsc() 168 { 169 uint64_t ret= 0; 170 uint64_t hi= 0; 171 uint64_t lo= 0; 172 173 __asm__ volatile( 174 "\trdtsc\n" 175 "\tmovl %%eax,%[lo]\n" 176 "\tmovl %%edx,%[hi]\n" 177 : [hi] "=m" (hi), [lo] "=m" (lo) 178 : :); 179 ret= (hi<<32ULL)|lo; 180 return ret; 181 } 182 183 184 // location of gdt and gdtr in low memory page 185 uint32_t loc_gdt= 0; 186 uint32_t loc_gdtr= 0; 187 uint32_t loc_idtr= 0; 188 189 190 // Setup AP low memory startup code 191 void setup_low_memory_ap_code(uint32_t temp_low_memory_4K) 192 { 193 uint8_t* code_to_patch = (uint8_t*)temp_low_memory_4K; 194 uint32_t end_page; 195 UINT16 cs_sel= 0; 196 UINT16 ds_sel= 0; 197 UINT16 es_sel= 0; 198 UINT16 gs_sel= 0; 199 UINT16 fs_sel= 0; 200 UINT16 ss_sel= 0; 201 202 #ifdef JLMDEBUG 203 bprint("setup_low_memory\n"); 204 #endif 205 UNUSEDVAR(end_page); 206 207 // zero low memory 208 vmm_memset(code_to_patch, 0, 0x1000); 209 210 // Copy the Startup code to the beginning of the page 211 vmm_memcpy(code_to_patch, (const void*)APStartUpCode, sizeof(APStartUpCode)); 212 213 IA32_GDTR current_gdtr; 214 //IA32_IDTR current_idtr; 215 __asm__ volatile ( 216 "\tsgdt %[current_gdtr]\n" 217 // "\tsidt %[current_idtr]\n" 218 :[current_gdtr] "=m" (current_gdtr)// , 219 // [current_idtr] "=m" (current_idtr) 220 ::); 221 222 loc_gdt= (temp_low_memory_4K+sizeof(APStartUpCode)+15)&(uint32_t)0xfffffff0; 223 loc_gdtr= (loc_gdt+current_gdtr.limit+16)&(uint32_t)0xfffffff0; 224 loc_idtr= (loc_gdtr+6); 225 226 // GDTR in page 227 *(uint16_t*)loc_gdtr= current_gdtr.limit; 228 *(uint32_t*)(loc_gdtr+2)= loc_gdt; 229 // IDTR in page 230 // *(uint16_t*)loc_idtr= current_idtr.limit; 231 // *(uint32_t*)(loc_idtr+2)= current_idtr.base; 232 233 cs_sel= ia32_read_cs(); 234 ds_sel= ia32_read_ds(); 235 ss_sel= ia32_read_ds(); 236 es_sel= ia32_read_ds(); 237 fs_sel= ia32_read_ds(); 238 gs_sel= ia32_read_ds(); 239 240 // Patch the startup code 241 *((UINT16*)(code_to_patch+GDTR_OFFSET_IN_CODE))= (uint16_t) loc_gdtr; 242 *((uint32_t*)(code_to_patch+CONT16_IN_CODE_OFFSET)) = 243 (uint32_t)code_to_patch + CONT16_VALUE_OFFSET; 244 *((UINT16*)(code_to_patch+CS_IN_CODE_OFFSET))= cs_sel; 245 *((UINT16*)(code_to_patch+DS_IN_CODE_OFFSET))= ds_sel; 246 *((UINT16*)(code_to_patch+ES_IN_CODE_OFFSET))= es_sel; 247 *((UINT16*)(code_to_patch+GS_IN_CODE_OFFSET))= gs_sel; 248 *((UINT16*)(code_to_patch+FS_IN_CODE_OFFSET))= fs_sel; 249 *((UINT16*)(code_to_patch+SS_IN_CODE_OFFSET))= ss_sel; 250 *((uint32_t*)(code_to_patch+AP_CONTINUE_WAKEUP_CODE_IN_CODE_OFFSET)) = 251 (uint32_t)(ap_continue_wakeup_code); 252 253 // GDT in page 254 vmm_memcpy((uint8_t*)loc_gdt, (uint8_t*)current_gdtr.base, current_gdtr.limit+1); 255 256 #if 0 257 // this loops after the ljmp location 258 // uint8_t* pnop= code_to_patch+CONT16_IN_CODE_OFFSET+6; 259 // uint8_t* pnop= code_to_patch+57; 260 uint8_t* pnop= code_to_patch+62; 261 *(pnop++)= 0xeb; *(pnop++)= 0xfe; 262 // *(pnop++)= 0x90; *(pnop++)= 0x90; 263 //*(pnop++)= 0x90; *(pnop++)= 0x90; 264 //*(pnop++)= 0x90; *(pnop++)= 0x90; 265 #endif 266 267 #ifdef JLMDEBUG 268 end_page= loc_idtr+6; 269 bprint("code_to_patch: %p, ljmp offset offset: 0x%08x, address: %p\n", 270 code_to_patch, CONT16_VALUE_OFFSET, code_to_patch+CONT16_VALUE_OFFSET); 271 bprint("cs_sel: 0x%04x, ", cs_sel); 272 bprint("ds_sel: 0x%04x, ", ds_sel); 273 bprint("ss_sel: 0x%04x\n", ss_sel); 274 bprint("loc_gdt: 0x%08x, loc_gdtr: 0x%08x, base: 0x%08x, limit: 0x%04x\n", 275 loc_gdt, loc_gdtr, loc_gdt, current_gdtr.limit); 276 HexDump(code_to_patch, (uint8_t*)end_page); 277 // LOOP_FOREVER 278 #endif 279 return; 280 } 281 282 283 // Initial AP setup in protected mode - should never return 284 void ap_continue_wakeup_code_C(uint32_t local_apic_id) 285 { 286 #ifdef JLMDEBUG 287 bprint("ap_continue_wakeup_code_C 0x%08x\n", local_apic_id); 288 #endif 289 // mark that the command was accepted 290 __asm__ volatile ( 291 "\tlock; incl %[g_ready_counter]\n" 292 : [g_ready_counter] "=m" (g_ready_counter) 293 ::); 294 #ifdef JLMDEBUG 295 bprint("calling user function\n"); 296 #endif 297 if(g_user_func==0) { 298 bprint("null user function in ap_continue_wakeup_code_C\n"); 299 LOOP_FOREVER 300 } 301 // user_func now contains address of the function to be called 302 g_user_func(local_apic_id, g_any_data_for_user_func); 303 return; 304 } 305 306 307 __asm__( 308 ".text\n" 309 ".globl ap_continue_wakeup_code\n" 310 ".type ap_continue_wakeup_code,@function\n" 311 "ap_continue_wakeup_code:\n" 312 "\tcli\n" 313 // get the Local APIC ID 314 // IA32_MSR_APIC_BASE= 0x01B 315 "\tmov $0x01B, %ecx\n" 316 "\trdmsr\n" 317 // LOCAL_APIC_BASE_MSR_MASK= $0xfffff000 318 "\tand $0xfffff000, %eax\n" 319 // LOCAL_APIC_IDENTIFICATION_OFFSET= 0x20 320 "\tmov 0x20(%eax), %ecx\n" 321 // LOCAL_APIC_ID_LOW_RESERVED_BITS_COUNT= 24 322 "\tshr $24, %ecx\n" 323 324 // edx <- address of presence array 325 "\tlea ap_presence_array, %edx\n" 326 "\tadd %ecx, %edx\n" 327 // mark current CPU as present 328 "\tmovb $1, (%edx)\n" 329 // last debug place 330 "1:\n" 331 // MP_BOOTSTRAP_STATE_APS_ENUMERATED= 1 332 "\tcmpl $1, mp_bootstrap_state\n" 333 "\tje 2f\n" 334 "\tpause\n" 335 "\tjmp 1b\n" 336 337 // stage 2 - setup the stack, GDT, IDT and jump to "C" 338 "2:\n" 339 340 // find my stack. My stack offset is in the array 341 // ecx contains CPU ID 342 // point edx to right stack 343 "\tleal evmm_stack_pointers_array, %edx\n" 344 "\tmovl %ecx, %eax\n" 345 "\tshl $2, %eax\n" 346 "\taddl %eax, %edx\n" 347 "\tmov (%edx), %esp\n" 348 349 // setup GDT 350 //"\tlgdt gp_GDT\n" 351 352 // setup IDT 353 "\tlidt gp_IDT\n" 354 355 // align stack 356 "\tand $0xfffffff0, %esp\n" 357 358 // enter "C" function 359 // push AP ordered ID 360 "\tmov %ecx, %edi\n" 361 "\tmov %ecx, %esi\n" 362 "\tpush %ecx\n" 363 // should never return 364 "\tcall ap_continue_wakeup_code_C\n" 365 ); 366 367 368 static uint8_t read_port_8(uint32_t port) 369 { 370 uint8_t out; 371 __asm__ volatile ( 372 "\tmovl %[port],%%edx\n" 373 "\txorl %%eax, %%eax\n" 374 "\tin %%dx, %%al\n" 375 "\tmovb %%al, %[out]\n" 376 : [out] "=m" (out) 377 : [port] "m" (port) 378 :"%edx", "%eax"); 379 return out; 380 } 381 382 383 // Stall (busy loop) for a given time, using the platform's speaker port 384 // h/w. Should only be called at initialization, since a guest OS may 385 // change the platform setting. 386 void startap_stall(uint32_t stall_usec) 387 { 388 uint32_t c= 0; 389 for(c= 0; c<stall_usec; c++) 390 read_port_8(IA32_DEBUG_IO_PORT); 391 return; 392 } 393 394 395 // Calibrate the internal variable with number of TSC ticks pers second. 396 // Should only be called at initialization, as it relies on startap_stall() 397 void startap_calibrate_tsc_ticks_per_msec(void) 398 { 399 uint64_t start_tsc, end_tsc; 400 401 start_tsc= startap_rdtsc(); 402 startap_stall(1000); // 1 ms 403 end_tsc= startap_rdtsc(); 404 startap_tsc_ticks_per_msec= (uint32_t)(end_tsc-start_tsc); 405 #ifdef JLMDEBUG 406 bprint("ticks/ms: %d\n", startap_tsc_ticks_per_msec); 407 #endif 408 return; 409 } 410 411 412 // Stall (busy loop) for a given time, using the CPU TSC register. 413 // Note that, depending on the CPU and ASCI modes, the stall accuracy 414 // may be rough. 415 void startap_stall_using_tsc(uint32_t stall_usec) 416 { 417 uint64_t start_tsc, end_tsc; 418 419 // Initialize startap_tsc_ticks_per_msec. Happens at boot time 420 if(startap_tsc_ticks_per_msec == 0) { 421 startap_calibrate_tsc_ticks_per_msec(); 422 } 423 // Calculate the start_tsc and end_tsc 424 start_tsc = startap_rdtsc(); 425 end_tsc= startap_rdtsc()+(((uint64_t)stall_usec/1000)* 426 (uint64_t)startap_tsc_ticks_per_msec); 427 while (start_tsc<end_tsc) { 428 __asm__ volatile ( 429 "\tpause\n" 430 :::); 431 start_tsc = startap_rdtsc(); 432 } 433 return; 434 } 435 436 437 void send_ipi_to_specific_cpu (uint32_t vector_number, 438 uint32_t delivery_mode, uint8_t dst) 439 { 440 IA32_ICR_LOW icr_low; 441 IA32_ICR_LOW icr_low_status; 442 IA32_ICR_HIGH icr_high; 443 UINT64 apic_base = 0; 444 445 #ifdef JLMDEBUG 446 bprint("send_ipi_to_specific_cpu (%d, %d, %d)\n", 447 vector_number, delivery_mode, dst); 448 #endif 449 vmm_memset(&icr_low, 0, sizeof(IA32_ICR_LOW)); 450 vmm_memset(&icr_low_status, 0, sizeof(IA32_ICR_LOW)); 451 vmm_memset(&icr_high, 0, sizeof(IA32_ICR_HIGH)); 452 icr_low.bits.vector= vector_number; 453 icr_low.bits.delivery_mode = delivery_mode; 454 455 // level is set to 1 (except for INIT_DEASSERT) 456 // trigger mode is set to 0 (except for INIT_DEASSERT) 457 icr_low.bits.level = 1; 458 icr_low.bits.trigger_mode = 0; 459 460 // send to specific cpu 461 icr_low.bits.destination_shorthand = LOCAL_APIC_BROADCAST_MODE_SPECIFY_CPU; 462 icr_high.bits.destination = dst; 463 464 // send 465 ia32_read_msr(IA32_MSR_APIC_BASE, &apic_base); 466 apic_base&= LOCAL_APIC_BASE_MSR_MASK; 467 #ifdef JLMDEBUG 468 bprint("about to call do loop base: %llx %x\n", apic_base, LOCAL_APIC_ICR_OFFSET); 469 #endif 470 471 do { 472 *(uint32_t*)&icr_low_status= *(uint32_t*)(uint32_t) 473 (apic_base+LOCAL_APIC_ICR_OFFSET); 474 } while (icr_low_status.bits.delivery_status!=0); 475 476 #ifdef JLMDEBUG 477 bprint("vector: 0x%04x, ", vector_number); 478 bprint("delivery: 0x%04x\n", delivery_mode); 479 bprint("shorthand: 0x%04x, ", LOCAL_APIC_BROADCAST_MODE_SPECIFY_CPU); 480 bprint("pointer hi: 0x%08x, ", *(uint32_t*)&icr_high); 481 bprint("low: 0x%08x\n", *(uint32_t*)&icr_low); 482 #endif 483 *(uint32_t*)(uint32_t)(apic_base+LOCAL_APIC_ICR_OFFSET_HIGH)= 484 *(uint32_t*)&icr_high; 485 *(uint32_t*)(uint32_t)(apic_base+LOCAL_APIC_ICR_OFFSET)= *(uint32_t*)&icr_low; 486 do { 487 startap_stall_using_tsc(10000); 488 *(uint32_t*)&icr_low_status= *(uint32_t*)(uint32_t) 489 (apic_base+LOCAL_APIC_ICR_OFFSET); 490 } while (icr_low_status.bits.delivery_status!=0); 491 #ifdef JLMDEBUG 492 bprint("send_ipi_to_specific_cpu returning\n"); 493 #endif 494 return; 495 } 496 497 // Send INIT IPI - SIPI to all active APs 498 void send_targeted_init_sipi(struct _INIT32_STRUCT *p_init32_data, 499 VMM_STARTUP_STRUCT *p_startup) 500 { 501 int i; 502 503 #ifdef JLMDEBUG 504 bprint("send_targeted_init_sipi, %d procs at boot. apic ids: \n", 505 p_startup->number_of_processors_at_boot_time); 506 for(i=0; i<p_startup->number_of_processors_at_boot_time; i++) 507 bprint(" %d", p_startup->cpu_local_apic_ids[i]); 508 bprint("\n"); 509 #endif 510 for (i = 0; i < p_startup->number_of_processors_at_boot_time - 1; i++) { 511 send_ipi_to_specific_cpu(0, LOCAL_APIC_DELIVERY_MODE_INIT, 512 p_startup->cpu_local_apic_ids[i+1]); 513 } 514 startap_stall_using_tsc(10000); // timeout - 10 milliseconds 515 516 // SIPI message contains address of the code, shifted right to 12 bits 517 // send it twice - according to manual 518 for (i= 0; i<p_startup->number_of_processors_at_boot_time-1; i++) { 519 send_ipi_to_specific_cpu(((uint32_t)p_init32_data->i32_low_memory_page)>>12, 520 LOCAL_APIC_DELIVERY_MODE_SIPI, p_startup->cpu_local_apic_ids[i+1]); 521 } 522 startap_stall_using_tsc(200000); // timeout - 200 miliseconds 523 for (i = 0; i < p_startup->number_of_processors_at_boot_time - 1; i++) { 524 send_ipi_to_specific_cpu(((uint32_t)p_init32_data->i32_low_memory_page)>>12, 525 LOCAL_APIC_DELIVERY_MODE_SIPI, p_startup->cpu_local_apic_ids[i+1]); 526 } 527 startap_stall_using_tsc(200000); // timeout - 200 miliseconds 528 } 529 530 531 // Start all APs in pre-os launch and only active APs in post-os launch and 532 // bring them to protected non-paged mode. 533 // Processors are left in the state were they wait for continuation signal 534 // p_init32_data - contains pointer to the free low memory page to be used 535 // for bootstap. After the return this memory is free 536 // p_startup - local apic ids of active cpus used post-os launch 537 // Return: 538 // number of processors that were init (not including BSP) 539 // or -1 on errors 540 uint32_t ap_procs_startup(struct _INIT32_STRUCT *p_init32_data, 541 VMM_STARTUP_STRUCT *p_startup) 542 { 543 #ifdef JLMDEBUG 544 bprint("ap_procs_startup init32 data: %p, startup: %p\n", p_init32_data, p_startup); 545 #endif 546 if(NULL==p_init32_data || 0 == p_init32_data->i32_low_memory_page) { 547 return (uint32_t)(-1); 548 } 549 550 // Stage 1 551 ap_initialize_environment(); 552 #ifdef JLMDEBUG 553 bprint("back from ap_initialize_environment()\n"); 554 #endif 555 556 // save IDT and GDT 557 __asm__ volatile ( 558 "\tsgdt %[gp_GDT]\n" 559 "\tsidt %[gp_IDT]\n" 560 : [gp_GDT] "=m" (gp_GDT), [gp_IDT] "=m" (gp_IDT) 561 ::); 562 563 // create AP startup code in low memory 564 setup_low_memory_ap_code(p_init32_data->i32_low_memory_page); 565 send_broadcast_init_sipi(p_init32_data); 566 567 // wait for predefined timeout 568 startap_stall_using_tsc(INITIAL_WAIT_FOR_APS_TIMEOUT_IN_MILIS); 569 570 // Stage 2 571 #ifdef JLMDEBUG 572 bprint("About to call bsp_enumerate_aps\n"); 573 #endif 574 g_aps_counter= bsp_enumerate_aps(); 575 #ifdef JLMDEBUG 576 bprint("g_aps_counter: %d\n", g_aps_counter); 577 bprint("gdt limit: %d, gdt base: 0x%08x\n", *(uint16_t*)(&gp_GDT[0]), 578 *(uint32_t*)(&gp_GDT[2])); 579 bprint("idt limit: %d, idt base: 0x%08x\n", *(uint16_t*)(&gp_IDT[0]), 580 *(uint32_t*)(&gp_IDT[2])); 581 bprint("mp_bootstrap_state: %d\n", mp_bootstrap_state); 582 for(int i=0; i<4; i++) { 583 bprint("stack[%d]= 0x%08x, ", i, evmm_stack_pointers_array[i]); 584 } 585 bprint("\n"); 586 #endif 587 #ifdef JLMDEBUG 588 extern int evmm_num_of_aps; 589 bprint("stage 2, num aps: %d, num aps: %d\n", g_aps_counter, evmm_num_of_aps); 590 for(int i=0; i<4; i++) { 591 bprint("presence[%d]= %d, ", i, ap_presence_array[i]); 592 } 593 bprint("\n"); 594 #endif 595 return g_aps_counter; 596 } 597 598 599 typedef struct { 600 void* any_data1; 601 void* any_data2; 602 void* any_data3; 603 UINT64 ep; 604 } APPLICATION_PARAMS_STRUCT; 605 606 607 // Run user specified function on all APs. 608 // If user function returns it should return in the protected 32bit mode. In this 609 // case APs enter the wait state once more. 610 // continue_ap_boot_func - user given function to continue AP boot 611 // any_data - data to be passed to the function 612 void ap_procs_run(FUNC_CONTINUE_AP_BOOT continue_ap_boot_func, void *any_data) 613 { 614 #ifdef JLMDEBUG 615 extern void start_application(uint32_t cpu_id, 616 const APPLICATION_PARAMS_STRUCT *params); 617 bprint("ap_procs_run function: %p, ready counter: %d, aps counter: %d\n", 618 continue_ap_boot_func, g_ready_counter, g_aps_counter); 619 bprint("start_application: %p\n", start_application); 620 #endif 621 g_user_func= continue_ap_boot_func; 622 g_any_data_for_user_func= any_data; 623 624 // signal to APs to pass to the next stage 625 mp_set_bootstrap_state(MP_BOOTSTRAP_STATE_APS_ENUMERATED); 626 627 // wait until all APs accept this 628 while (g_ready_counter<g_aps_counter) { 629 __asm__ volatile ( "\tpause\n" :::); 630 } 631 #ifdef JLMDEBUG 632 bprint("ap_procs_run function returning %d %d\n", 633 g_ready_counter, g_aps_counter); 634 #endif 635 return; 636 } 637 638 639 // Function : bsp_enumerate_aps 640 // Purpose : Walk through ap_presence_array and count discovered APs. 641 // : and modifies array thus it will contain AP IDs, 642 // : and not just 1/0. 643 // Return : Total number of APs, discovered till now. 644 // Notes : Should be called on BSP 645 uint8_t bsp_enumerate_aps(void) 646 { 647 int i; 648 uint8_t ap_num = 0; 649 650 for (i = 1; i<NELEMENTS(ap_presence_array); ++i) { 651 if (0 != ap_presence_array[i]) { 652 ap_presence_array[i] = ++ap_num; 653 } 654 } 655 return ap_num; 656 } 657 658 659 void ap_initialize_environment(void) 660 { 661 mp_bootstrap_state = MP_BOOTSTRAP_STATE_INIT; 662 g_ready_counter = 0; 663 g_user_func = 0; 664 g_any_data_for_user_func = 0; 665 } 666 667 668 void mp_set_bootstrap_state(uint32_t new_state) 669 { 670 #ifdef JLMDEBUG 671 bprint("mp_set_bootstrap_state %d\n", new_state); 672 #endif 673 __asm__ volatile ( 674 "\tpush %%eax\n" 675 "\tmovl %[new_state], %%eax\n" 676 "\tlock; xchgl %%eax, %[mp_bootstrap_state]\n" 677 "\tpopl %%eax\n" 678 : [mp_bootstrap_state] "=m" (mp_bootstrap_state) 679 : [new_state] "g" (new_state) 680 : "%eax"); 681 return; 682 } 683 684 685 void send_init_ipi(void) 686 { 687 send_ipi_to_all_excluding_self(0, LOCAL_APIC_DELIVERY_MODE_INIT); 688 } 689 690 void send_sipi_ipi(void* code_start) 691 { 692 #ifdef JLMDEBUG 693 bprint("send_ipi_to_all_excluding_self\n"); 694 #endif 695 // SIPI message contains address of the code, shifted right to 12 bits 696 send_ipi_to_all_excluding_self(((uint32_t)code_start)>>12, 697 LOCAL_APIC_DELIVERY_MODE_SIPI); 698 } 699 700 701 // Send INIT IPI - SIPI to all APs in broadcast mode 702 void send_broadcast_init_sipi(INIT32_STRUCT *p_init32_data) 703 { 704 #ifdef JLMDEBUG 705 bprint("send_broadcast_init_sipi\n"); 706 #endif 707 send_init_ipi(); 708 startap_stall_using_tsc(10000); // timeout - 10 milliseconds 709 710 #ifdef JLMDEBUG 711 bprint("back from stall\n"); 712 #endif 713 // SIPI message contains address of the code, shifted right to 12 bits 714 // send it twice - according to manual 715 send_sipi_ipi((void *)p_init32_data->i32_low_memory_page); 716 startap_stall_using_tsc(200000); // timeout - 200 milliseconds 717 #ifdef JLMDEBUG 718 bprint("back from first send_sipi_ipi\n"); 719 #endif 720 send_sipi_ipi((void*)p_init32_data->i32_low_memory_page); 721 startap_stall_using_tsc(200000); // timeout - 200 milliseconds 722 #ifdef JLMDEBUG 723 bprint("back from second send_sipi_ipi\n"); 724 #endif 725 } 726 727 728 // send IPI 729 void send_ipi_to_all_excluding_self(uint32_t vector_number, 730 uint32_t delivery_mode) 731 { 732 IA32_ICR_LOW icr_low; 733 IA32_ICR_LOW icr_low_status; 734 IA32_ICR_HIGH icr_high; 735 UINT64 apic_base = 0; 736 737 #ifdef JLMDEBG1 738 bprint("send_ipi_to_all_excluding_self(%d, %d)\n", 739 vector_number, delivery_mode); 740 #endif 741 vmm_memset(&icr_low, 0, sizeof(IA32_ICR_LOW)); 742 vmm_memset(&icr_low_status, 0, sizeof(IA32_ICR_LOW)); 743 vmm_memset(&icr_high, 0, sizeof(IA32_ICR_HIGH)); 744 icr_low.bits.vector= vector_number; 745 icr_low.bits.delivery_mode = delivery_mode; 746 747 // level is set to 1 (except for INIT_DEASSERT, 748 // which is not supported in P3 and P4) 749 // trigger mode is set to 0 (except for INIT_DEASSERT) 750 icr_low.bits.level = 1; 751 icr_low.bits.trigger_mode = 0; 752 753 // broadcast mode - ALL_EXCLUDING_SELF 754 icr_low.bits.destination_shorthand = 755 LOCAL_APIC_BROADCAST_MODE_ALL_EXCLUDING_SELF; 756 757 // send 758 ia32_read_msr(IA32_MSR_APIC_BASE, &apic_base); 759 apic_base &= LOCAL_APIC_BASE_MSR_MASK; 760 do { 761 *(uint32_t*)&icr_low_status= *(uint32_t*)(uint32_t) 762 (apic_base+LOCAL_APIC_ICR_OFFSET); 763 } while (icr_low_status.bits.delivery_status!=0); 764 765 #ifdef JLMDEBUG 766 bprint("vector: 0x%04x, ", vector_number); 767 bprint("mode: 0x%04x, ", delivery_mode); 768 bprint("pointer hi: 0x%08x, ", *(uint32_t*)&icr_high); 769 bprint("low: 0x%08x\n", *(uint32_t*)&icr_low); 770 #endif 771 *(uint32_t*)(uint32_t)(apic_base+LOCAL_APIC_ICR_OFFSET_HIGH)= 772 *(uint32_t*)&icr_high; 773 *(uint32_t*)(uint32_t)(apic_base+LOCAL_APIC_ICR_OFFSET)= 774 *(uint32_t*)&icr_low; 775 776 do { 777 startap_stall_using_tsc(10000); 778 *(uint32_t*)&icr_low_status= *(uint32_t*)(uint32_t) 779 (apic_base+LOCAL_APIC_ICR_OFFSET); 780 } while (icr_low_status.bits.delivery_status!=0); 781 #ifdef JLMDEBUG 782 bprint("returning from send_ipi_to_all_excluding_self\n"); 783 #endif 784 return; 785 } 786