github.com/jlmucb/cloudproxy@v0.0.0-20170830161738-b5aa0b619bc4/cpvmm/vmm/bootstrap/bootstrap_ap_procs_init.c (about)

     1  /*
     2   * Copyright (c) 2013 Intel Corporation
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *     http://www.apache.org/licenses/LICENSE-2.0
     8   * Unless required by applicable law or agreed to in writing, software
     9   * distributed under the License is distributed on an "AS IS" BASIS,
    10   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11   * See the License for the specific language governing permissions and
    12   * limitations under the License.
    13   */
    14  
    15  
    16  // Perform application processors init
    17  #include "bootstrap_types.h"
    18  #include "vmm_defs.h"
    19  #include "vmm_startup.h"
    20  #include "bootstrap_ap_procs_init.h"
    21  #include "bootstrap_print.h"
    22  #include "common_libc.h"
    23  #include "ia32_defs.h"
    24  #include "x32_init64.h"
    25  #include "em64t_defs.h"
    26  
    27  
    28  #define JLMDEBUG
    29  #ifdef JLMDEBUG
    30  extern void HexDump(uint8_t*, uint8_t*);
    31  extern uint16_t ia32_read_cs();
    32  extern uint16_t ia32_read_ds();
    33  extern uint16_t ia32_read_ss();
    34  #endif 
    35  
    36  #ifndef UNUSEDVAR
    37  #define UNUSEDVAR(x) ((void)x)
    38  #endif
    39  
    40  
    41  // AP startup algorithm
    42  //  Stage 1
    43  //   BSP:
    44  //      1. Copy APStartUpCode + GDT to low memory page
    45  //      2. Clear APs counter
    46  //      3. Send SIPI to all processors excluding self
    47  //      4. Wait timeout
    48  //   APs on SIPI receive:
    49  //      1. Switch to protected mode
    50  //      2. lock inc APs counter + remember my AP number
    51  //      3. Loop on wait_lock1 until it changes zero
    52  // Stage 2
    53  //   BSP after timeout:
    54  //      5. Read number of APs and allocate memory for stacks
    55  //      6. Save GDT and IDT in global array
    56  //      7. Clear ready_counter count
    57  //      8. Set wait_lock1 to 1
    58  //      9. Loop on ready_counter until it will be equal to number of APs
    59  //   APs on wait_1_lock set
    60  //      4. Set stack in a right way
    61  //      5. Set right GDT and IDT
    62  //      6. Enter "C" code
    63  //      7. Increment ready_counter
    64  //      8. Loop on wait_lock2 until it changes from zero
    65  // Stage 3
    66  //   BSP after ready_counter becomes == APs number
    67  //      10. Return to user
    68  // PROBLEM:
    69  //   NMI may crash the system if it comes before AP stack init done
    70  
    71  
    72  #define IA32_DEBUG_IO_PORT   0x80
    73  #define INITIAL_WAIT_FOR_APS_TIMEOUT_IN_MILIS 150000
    74  
    75  void send_init_ipi(void);
    76  void send_broadcast_init_sipi(INIT32_STRUCT *p_init32_data);
    77  void send_ipi_to_all_excluding_self(uint32_t vector_number, 
    78                uint32_t delivery_mode);
    79  
    80  extern void ia32_read_msr(uint32_t msr_id, uint64_t *p_value);
    81  static uint32_t startap_tsc_ticks_per_msec = 0;
    82  
    83  
    84  #define MP_BOOTSTRAP_STATE_INIT  0
    85  #define MP_BOOTSTRAP_STATE_APS_ENUMERATED 1
    86  
    87  
    88  static volatile uint32_t mp_bootstrap_state;
    89  
    90  // stage 1
    91  static uint32_t  g_aps_counter = 0;
    92  
    93  // stage 2
    94  static uint8_t   gp_GDT[6] = {0};  // xx:xxxx
    95  static uint8_t   gp_IDT[6] = {0};  // xx:xxxx
    96  
    97  static volatile uint32_t        g_ready_counter = 0;
    98  static FUNC_CONTINUE_AP_BOOT    g_user_func = 0;
    99  static void*                    g_any_data_for_user_func = 0;
   100  
   101  // 1 in i position means CPU[i] exists
   102  static uint8_t ap_presence_array[VMM_MAX_CPU_SUPPORTED] = {0};  
   103  extern uint32_t evmm_stack_pointers_array[];
   104  
   105  
   106  // Layout of low memory page
   107  //   APStartUpCode  <--- low_mem
   108  //   GdtTable       <-- ALIGN16(low_mem+sizeof(APStartupCode))= loc_gdt
   109  //   GDT Register   <-- ALIGN16(loc_gdt+sizeof(gdt))= loc_gdtr
   110  
   111  // Uncomment the following line to deadloop in AP startup
   112  // #define BREAK_IN_AP_STARTUP
   113  const uint8_t APStartUpCode[] =
   114  {
   115  #ifdef BREAK_IN_AP_STARTUP
   116      0xEB, 0xFE,                    // jmp $
   117  #endif
   118      0x0f, 0x01, 0x16, 0x00, 0x00,  // 00: lgdt  GDTR
   119      0x0F, 0x20, 0xC0,              // 05: mov  eax,cr0
   120      0x0C, 0x33,                    // 08: or   al,1
   121      0x0F, 0x22, 0xC0,              // 10: mov  cr0,eax
   122      0x66, 0xEA,                    // 13: ljmp CS,CONT16
   123      0x00, 0x00, 0x00, 0x00,        // 15: CONT16
   124      0x00, 0x00,                    // 19: CS_VALUE
   125  //CONT16:
   126      0xFA,                          // 21: cli
   127      0x66, 0xB8, 0x00, 0x00,        // 22: mov  ax,DS_VALUE
   128      0x66, 0x8E, 0xD8,              // 26: mov  ds,ax
   129      0x66, 0xB8, 0x00, 0x00,        // 29: mov  ax,ES_VALUE
   130      0x66, 0x8E, 0xC0,              // 33: mov  es,ax
   131      0x66, 0xB8, 0x00, 0x00,        // 36: mov  ax,GS_VALUE
   132      0x66, 0x8E, 0xE8,              // 40: mov  gs,ax
   133      0x66, 0xB8, 0x00, 0x00,        // 43: mov  ax,FS_VALUE
   134      0x66, 0x8E, 0xE0,              // 47: mov  fs,ax
   135      0x66, 0xB8, 0x00, 0x00,        // 50: mov  ax,SS_VALUE
   136      0x66, 0x8E, 0xD0,              // 54: mov  ss,ax
   137      0xB8, 0x00, 0x00, 0x00, 0x00,  // 57: mov  eax,AP_CONTINUE_WAKEUP_CODE
   138      0xFF, 0xE0,                    // 62: jmp  eax
   139  };
   140  
   141  #ifdef BREAK_IN_AP_STARTUP
   142  #define AP_CODE_START                           2
   143  #else
   144  #define AP_CODE_START                           0
   145  #endif
   146  
   147  #define GDTR_OFFSET_IN_CODE                      (3 + AP_CODE_START)
   148  #define CONT16_IN_CODE_OFFSET                   (15 + AP_CODE_START)
   149  #define CS_IN_CODE_OFFSET                       (19 + AP_CODE_START)
   150  #define DS_IN_CODE_OFFSET                       (24 + AP_CODE_START)
   151  #define ES_IN_CODE_OFFSET                       (31 + AP_CODE_START)
   152  #define GS_IN_CODE_OFFSET                       (38 + AP_CODE_START)
   153  #define FS_IN_CODE_OFFSET                       (45 + AP_CODE_START)
   154  #define SS_IN_CODE_OFFSET                       (52 + AP_CODE_START)
   155  #define AP_CONTINUE_WAKEUP_CODE_IN_CODE_OFFSET  (58 + AP_CODE_START)
   156  
   157  #define CONT16_VALUE_OFFSET                     (21 + AP_CODE_START)
   158  
   159  
   160  void     ap_continue_wakeup_code(void);
   161  void     ap_continue_wakeup_code_C(uint32_t local_apic_id);
   162  uint8_t  bsp_enumerate_aps(void);
   163  void     ap_initialize_environment(void);
   164  void     mp_set_bootstrap_state(uint32_t new_state);
   165  
   166  
   167  uint64_t startap_rdtsc()
   168  {
   169      uint64_t  ret= 0;
   170      uint64_t  hi= 0;
   171      uint64_t  lo= 0;
   172  
   173      __asm__ volatile(
   174          "\trdtsc\n"
   175          "\tmovl  %%eax,%[lo]\n"
   176          "\tmovl  %%edx,%[hi]\n"
   177      : [hi] "=m" (hi), [lo] "=m" (lo)
   178      : :);
   179      ret= (hi<<32ULL)|lo;
   180      return ret;
   181  }
   182  
   183  
   184  // location of gdt and gdtr in low memory page
   185  uint32_t    loc_gdt= 0;
   186  uint32_t    loc_gdtr= 0;
   187  uint32_t    loc_idtr= 0;
   188  
   189  
   190  // Setup AP low memory startup code
   191  void setup_low_memory_ap_code(uint32_t temp_low_memory_4K)
   192  {
   193      uint8_t*    code_to_patch = (uint8_t*)temp_low_memory_4K;
   194      uint32_t    end_page;
   195      UINT16      cs_sel= 0;
   196      UINT16      ds_sel= 0;
   197      UINT16      es_sel= 0;
   198      UINT16      gs_sel= 0;
   199      UINT16      fs_sel= 0;
   200      UINT16      ss_sel= 0;
   201  
   202  #ifdef JLMDEBUG
   203      bprint("setup_low_memory\n");
   204  #endif
   205      UNUSEDVAR(end_page);
   206  
   207      // zero low memory
   208      vmm_memset(code_to_patch, 0, 0x1000);
   209  
   210      // Copy the Startup code to the beginning of the page
   211      vmm_memcpy(code_to_patch, (const void*)APStartUpCode, sizeof(APStartUpCode));
   212  
   213      IA32_GDTR current_gdtr;
   214      //IA32_IDTR current_idtr;
   215      __asm__ volatile (
   216         "\tsgdt  %[current_gdtr]\n"
   217         // "\tsidt  %[current_idtr]\n"
   218      :[current_gdtr] "=m" (current_gdtr)// ,
   219       // [current_idtr] "=m" (current_idtr)
   220      ::);
   221  
   222      loc_gdt= (temp_low_memory_4K+sizeof(APStartUpCode)+15)&(uint32_t)0xfffffff0;
   223      loc_gdtr= (loc_gdt+current_gdtr.limit+16)&(uint32_t)0xfffffff0;
   224      loc_idtr= (loc_gdtr+6);
   225  
   226      // GDTR in page
   227      *(uint16_t*)loc_gdtr= current_gdtr.limit;
   228      *(uint32_t*)(loc_gdtr+2)= loc_gdt;
   229      // IDTR in page
   230      // *(uint16_t*)loc_idtr= current_idtr.limit;
   231      // *(uint32_t*)(loc_idtr+2)= current_idtr.base;
   232    
   233      cs_sel= ia32_read_cs();
   234      ds_sel= ia32_read_ds();
   235      ss_sel= ia32_read_ds();
   236      es_sel= ia32_read_ds();
   237      fs_sel= ia32_read_ds();
   238      gs_sel= ia32_read_ds();
   239  
   240      // Patch the startup code
   241      *((UINT16*)(code_to_patch+GDTR_OFFSET_IN_CODE))= (uint16_t) loc_gdtr;
   242      *((uint32_t*)(code_to_patch+CONT16_IN_CODE_OFFSET)) =
   243                                     (uint32_t)code_to_patch + CONT16_VALUE_OFFSET;
   244      *((UINT16*)(code_to_patch+CS_IN_CODE_OFFSET))= cs_sel;
   245      *((UINT16*)(code_to_patch+DS_IN_CODE_OFFSET))= ds_sel;
   246      *((UINT16*)(code_to_patch+ES_IN_CODE_OFFSET))= es_sel;
   247      *((UINT16*)(code_to_patch+GS_IN_CODE_OFFSET))= gs_sel;
   248      *((UINT16*)(code_to_patch+FS_IN_CODE_OFFSET))= fs_sel;
   249      *((UINT16*)(code_to_patch+SS_IN_CODE_OFFSET))= ss_sel;
   250      *((uint32_t*)(code_to_patch+AP_CONTINUE_WAKEUP_CODE_IN_CODE_OFFSET)) =
   251                                              (uint32_t)(ap_continue_wakeup_code);
   252  
   253      // GDT in page
   254      vmm_memcpy((uint8_t*)loc_gdt, (uint8_t*)current_gdtr.base, current_gdtr.limit+1);
   255  
   256  #if 0
   257      // this loops after the ljmp location
   258      // uint8_t* pnop= code_to_patch+CONT16_IN_CODE_OFFSET+6;
   259      // uint8_t* pnop= code_to_patch+57;
   260      uint8_t* pnop= code_to_patch+62;
   261      *(pnop++)= 0xeb; *(pnop++)= 0xfe; 
   262      // *(pnop++)= 0x90; *(pnop++)= 0x90; 
   263      //*(pnop++)= 0x90; *(pnop++)= 0x90; 
   264      //*(pnop++)= 0x90; *(pnop++)= 0x90; 
   265  #endif
   266  
   267  #ifdef JLMDEBUG
   268      end_page= loc_idtr+6;
   269      bprint("code_to_patch: %p, ljmp offset offset: 0x%08x, address: %p\n",  
   270             code_to_patch, CONT16_VALUE_OFFSET, code_to_patch+CONT16_VALUE_OFFSET);
   271      bprint("cs_sel: 0x%04x, ", cs_sel);
   272      bprint("ds_sel: 0x%04x, ", ds_sel);
   273      bprint("ss_sel: 0x%04x\n", ss_sel);
   274      bprint("loc_gdt: 0x%08x, loc_gdtr: 0x%08x, base: 0x%08x, limit: 0x%04x\n",
   275              loc_gdt, loc_gdtr, loc_gdt, current_gdtr.limit);
   276      HexDump(code_to_patch, (uint8_t*)end_page);
   277      // LOOP_FOREVER
   278  #endif
   279      return;
   280  }
   281  
   282  
   283  // Initial AP setup in protected mode - should never return
   284  void ap_continue_wakeup_code_C(uint32_t local_apic_id)
   285  {
   286  #ifdef JLMDEBUG
   287     bprint("ap_continue_wakeup_code_C 0x%08x\n", local_apic_id);
   288  #endif
   289      // mark that the command was accepted
   290      __asm__ volatile (
   291          "\tlock; incl %[g_ready_counter]\n"
   292      : [g_ready_counter] "=m" (g_ready_counter)
   293      ::);
   294  #ifdef JLMDEBUG
   295     bprint("calling user function\n");
   296  #endif
   297      if(g_user_func==0) {
   298          bprint("null user function in ap_continue_wakeup_code_C\n");
   299          LOOP_FOREVER
   300      }
   301      // user_func now contains address of the function to be called
   302      g_user_func(local_apic_id, g_any_data_for_user_func);
   303      return;
   304  }
   305  
   306  
   307  __asm__(
   308  ".text\n"
   309  ".globl ap_continue_wakeup_code\n"
   310  ".type ap_continue_wakeup_code,@function\n"
   311  "ap_continue_wakeup_code:\n"
   312          "\tcli\n"
   313          // get the Local APIC ID
   314          // IA32_MSR_APIC_BASE= 0x01B
   315          "\tmov  $0x01B, %ecx\n"
   316          "\trdmsr\n"
   317          // LOCAL_APIC_BASE_MSR_MASK= $0xfffff000
   318          "\tand   $0xfffff000, %eax\n"
   319          // LOCAL_APIC_IDENTIFICATION_OFFSET= 0x20
   320          "\tmov   0x20(%eax), %ecx\n"
   321          // LOCAL_APIC_ID_LOW_RESERVED_BITS_COUNT= 24
   322          "\tshr   $24, %ecx\n"
   323  
   324          // edx <- address of presence array
   325          "\tlea   ap_presence_array, %edx\n"
   326          "\tadd   %ecx, %edx\n"
   327          // mark current CPU as present
   328          "\tmovb  $1, (%edx)\n"
   329          // last debug place
   330  "1:\n"
   331          // MP_BOOTSTRAP_STATE_APS_ENUMERATED= 1
   332          "\tcmpl  $1, mp_bootstrap_state\n"
   333          "\tje    2f\n"
   334          "\tpause\n"
   335          "\tjmp   1b\n"
   336  
   337          // stage 2 - setup the stack, GDT, IDT and jump to "C"
   338  "2:\n"
   339  
   340          // find my stack. My stack offset is in the array 
   341          // ecx contains CPU ID
   342          // point edx to right stack
   343          "\tleal  evmm_stack_pointers_array, %edx\n"
   344          "\tmovl   %ecx, %eax\n"
   345          "\tshl    $2, %eax\n"
   346          "\taddl   %eax, %edx\n"
   347          "\tmov   (%edx), %esp\n"
   348  
   349          // setup GDT
   350          //"\tlgdt  gp_GDT\n"
   351  
   352          // setup IDT
   353          "\tlidt gp_IDT\n"
   354  
   355          // align stack
   356          "\tand   $0xfffffff0, %esp\n"
   357  
   358          // enter "C" function
   359          //  push  AP ordered ID
   360          "\tmov    %ecx, %edi\n"
   361          "\tmov    %ecx, %esi\n"
   362          "\tpush   %ecx\n"
   363          // should never return
   364          "\tcall  ap_continue_wakeup_code_C\n"
   365  );
   366  
   367  
   368  static uint8_t read_port_8(uint32_t port)
   369  {
   370      uint8_t out;
   371      __asm__ volatile (
   372          "\tmovl %[port],%%edx\n"
   373          "\txorl %%eax, %%eax\n"
   374          "\tin   %%dx, %%al\n"
   375          "\tmovb %%al, %[out]\n"
   376      : [out] "=m" (out)
   377      : [port] "m" (port)
   378      :"%edx", "%eax");
   379      return out;
   380  }
   381  
   382  
   383  // Stall (busy loop) for a given time, using the platform's speaker port
   384  // h/w.  Should only be called at initialization, since a guest OS may
   385  // change the platform setting.
   386  void startap_stall(uint32_t stall_usec)
   387  {
   388      uint32_t   c= 0;
   389      for(c= 0; c<stall_usec; c++)
   390          read_port_8(IA32_DEBUG_IO_PORT);
   391      return;
   392  }
   393  
   394  
   395  // Calibrate the internal variable with number of TSC ticks pers second.
   396  // Should only be called at initialization, as it relies on startap_stall()
   397  void startap_calibrate_tsc_ticks_per_msec(void)
   398  {
   399      uint64_t start_tsc, end_tsc;
   400  
   401      start_tsc= startap_rdtsc();
   402      startap_stall(1000);   // 1 ms
   403      end_tsc= startap_rdtsc();
   404      startap_tsc_ticks_per_msec= (uint32_t)(end_tsc-start_tsc);
   405  #ifdef JLMDEBUG
   406      bprint("ticks/ms: %d\n", startap_tsc_ticks_per_msec);
   407  #endif
   408      return;
   409  }
   410  
   411  
   412  // Stall (busy loop) for a given time, using the CPU TSC register.
   413  // Note that, depending on the CPU and ASCI modes, the stall accuracy 
   414  // may be rough.
   415  void startap_stall_using_tsc(uint32_t stall_usec)
   416  {
   417      uint64_t   start_tsc, end_tsc;
   418  
   419      // Initialize startap_tsc_ticks_per_msec. Happens at boot time
   420      if(startap_tsc_ticks_per_msec == 0) {
   421          startap_calibrate_tsc_ticks_per_msec();
   422      }
   423      // Calculate the start_tsc and end_tsc
   424      start_tsc = startap_rdtsc();
   425      end_tsc= startap_rdtsc()+(((uint64_t)stall_usec/1000)*
   426                               (uint64_t)startap_tsc_ticks_per_msec);
   427      while (start_tsc<end_tsc) {
   428          __asm__ volatile (
   429              "\tpause\n"
   430          :::);
   431          start_tsc = startap_rdtsc();
   432      }
   433      return;
   434  }
   435  
   436  
   437  void send_ipi_to_specific_cpu (uint32_t vector_number, 
   438                              uint32_t delivery_mode, uint8_t dst)
   439  {
   440      IA32_ICR_LOW           icr_low;
   441      IA32_ICR_LOW           icr_low_status;
   442      IA32_ICR_HIGH          icr_high;
   443      UINT64                 apic_base = 0;
   444  
   445  #ifdef JLMDEBUG
   446      bprint("send_ipi_to_specific_cpu (%d, %d, %d)\n",
   447                   vector_number, delivery_mode, dst);
   448  #endif
   449      vmm_memset(&icr_low, 0, sizeof(IA32_ICR_LOW));
   450      vmm_memset(&icr_low_status, 0, sizeof(IA32_ICR_LOW));
   451      vmm_memset(&icr_high, 0, sizeof(IA32_ICR_HIGH));
   452      icr_low.bits.vector= vector_number;
   453      icr_low.bits.delivery_mode = delivery_mode;
   454  
   455      //    level is set to 1 (except for INIT_DEASSERT)
   456      //    trigger mode is set to 0 (except for INIT_DEASSERT)
   457      icr_low.bits.level = 1;
   458      icr_low.bits.trigger_mode = 0;
   459  
   460      // send to specific cpu
   461      icr_low.bits.destination_shorthand = LOCAL_APIC_BROADCAST_MODE_SPECIFY_CPU;
   462      icr_high.bits.destination = dst;
   463  
   464      // send
   465      ia32_read_msr(IA32_MSR_APIC_BASE, &apic_base);
   466      apic_base&= LOCAL_APIC_BASE_MSR_MASK;
   467  #ifdef JLMDEBUG
   468      bprint("about to call do loop base: %llx %x\n", apic_base, LOCAL_APIC_ICR_OFFSET);
   469  #endif
   470  
   471      do {
   472          *(uint32_t*)&icr_low_status= *(uint32_t*)(uint32_t)
   473                             (apic_base+LOCAL_APIC_ICR_OFFSET);
   474      } while (icr_low_status.bits.delivery_status!=0);
   475  
   476  #ifdef JLMDEBUG
   477      bprint("vector: 0x%04x, ", vector_number);
   478      bprint("delivery: 0x%04x\n", delivery_mode);
   479      bprint("shorthand: 0x%04x, ", LOCAL_APIC_BROADCAST_MODE_SPECIFY_CPU);
   480      bprint("pointer hi: 0x%08x, ", *(uint32_t*)&icr_high);
   481      bprint("low: 0x%08x\n", *(uint32_t*)&icr_low);
   482  #endif
   483      *(uint32_t*)(uint32_t)(apic_base+LOCAL_APIC_ICR_OFFSET_HIGH)= 
   484                  *(uint32_t*)&icr_high;
   485      *(uint32_t*)(uint32_t)(apic_base+LOCAL_APIC_ICR_OFFSET)= *(uint32_t*)&icr_low;
   486      do {
   487          startap_stall_using_tsc(10000);
   488          *(uint32_t*)&icr_low_status= *(uint32_t*)(uint32_t)
   489                      (apic_base+LOCAL_APIC_ICR_OFFSET);
   490      } while (icr_low_status.bits.delivery_status!=0);
   491  #ifdef JLMDEBUG
   492      bprint("send_ipi_to_specific_cpu returning\n");
   493  #endif
   494      return;
   495  }
   496  
   497  // Send INIT IPI - SIPI to all active APs
   498  void send_targeted_init_sipi(struct _INIT32_STRUCT *p_init32_data,
   499                                      VMM_STARTUP_STRUCT *p_startup)
   500  {
   501      int i;
   502          
   503  #ifdef JLMDEBUG
   504      bprint("send_targeted_init_sipi, %d procs at boot.  apic ids: \n",
   505             p_startup->number_of_processors_at_boot_time);
   506      for(i=0; i<p_startup->number_of_processors_at_boot_time; i++)
   507          bprint(" %d", p_startup->cpu_local_apic_ids[i]);
   508      bprint("\n");
   509  #endif
   510      for (i = 0; i < p_startup->number_of_processors_at_boot_time - 1; i++) {
   511          send_ipi_to_specific_cpu(0, LOCAL_APIC_DELIVERY_MODE_INIT, 
   512                                   p_startup->cpu_local_apic_ids[i+1]);
   513      }
   514      startap_stall_using_tsc(10000); // timeout - 10 milliseconds
   515  
   516      // SIPI message contains address of the code, shifted right to 12 bits
   517      // send it twice - according to manual
   518      for (i= 0; i<p_startup->number_of_processors_at_boot_time-1; i++) {
   519          send_ipi_to_specific_cpu(((uint32_t)p_init32_data->i32_low_memory_page)>>12, 
   520                  LOCAL_APIC_DELIVERY_MODE_SIPI, p_startup->cpu_local_apic_ids[i+1]);
   521      }
   522      startap_stall_using_tsc(200000); // timeout - 200 miliseconds
   523      for (i = 0; i < p_startup->number_of_processors_at_boot_time - 1; i++) {
   524          send_ipi_to_specific_cpu(((uint32_t)p_init32_data->i32_low_memory_page)>>12, 
   525              LOCAL_APIC_DELIVERY_MODE_SIPI, p_startup->cpu_local_apic_ids[i+1]);
   526      }
   527      startap_stall_using_tsc(200000); // timeout - 200 miliseconds
   528  }
   529  
   530  
   531  // Start all APs in pre-os launch and only active APs in post-os launch and 
   532  // bring them to protected non-paged mode.
   533  // Processors are left in the state were they wait for continuation signal
   534  //    p_init32_data - contains pointer to the free low memory page to be used
   535  //                    for bootstap. After the return this memory is free
   536  //    p_startup - local apic ids of active cpus used post-os launch
   537  //  Return:
   538  //    number of processors that were init (not including BSP)
   539  //    or -1 on errors
   540  uint32_t ap_procs_startup(struct _INIT32_STRUCT *p_init32_data, 
   541                            VMM_STARTUP_STRUCT *p_startup)
   542  {
   543  #ifdef JLMDEBUG
   544      bprint("ap_procs_startup init32 data: %p, startup: %p\n", p_init32_data, p_startup);
   545  #endif
   546      if(NULL==p_init32_data || 0 == p_init32_data->i32_low_memory_page) {
   547          return (uint32_t)(-1);
   548      }
   549  
   550      // Stage 1 
   551      ap_initialize_environment();
   552  #ifdef JLMDEBUG
   553      bprint("back from ap_initialize_environment()\n");
   554  #endif
   555  
   556      // save IDT and GDT
   557      __asm__ volatile (
   558          "\tsgdt %[gp_GDT]\n"
   559          "\tsidt %[gp_IDT]\n"
   560      : [gp_GDT] "=m" (gp_GDT), [gp_IDT] "=m" (gp_IDT)
   561      ::);
   562  
   563      // create AP startup code in low memory
   564      setup_low_memory_ap_code(p_init32_data->i32_low_memory_page);
   565      send_broadcast_init_sipi(p_init32_data);
   566  
   567      // wait for predefined timeout
   568      startap_stall_using_tsc(INITIAL_WAIT_FOR_APS_TIMEOUT_IN_MILIS);
   569  
   570      // Stage 2 
   571  #ifdef JLMDEBUG
   572      bprint("About to call bsp_enumerate_aps\n");
   573  #endif
   574      g_aps_counter= bsp_enumerate_aps();
   575  #ifdef JLMDEBUG
   576      bprint("g_aps_counter: %d\n", g_aps_counter);
   577      bprint("gdt limit: %d, gdt base: 0x%08x\n", *(uint16_t*)(&gp_GDT[0]),
   578              *(uint32_t*)(&gp_GDT[2]));
   579      bprint("idt limit: %d, idt base: 0x%08x\n", *(uint16_t*)(&gp_IDT[0]),
   580              *(uint32_t*)(&gp_IDT[2]));
   581      bprint("mp_bootstrap_state: %d\n", mp_bootstrap_state);
   582      for(int i=0; i<4; i++) {
   583          bprint("stack[%d]= 0x%08x, ", i, evmm_stack_pointers_array[i]);
   584      }
   585      bprint("\n");
   586  #endif
   587  #ifdef JLMDEBUG
   588      extern int evmm_num_of_aps;
   589      bprint("stage 2, num aps: %d, num aps: %d\n", g_aps_counter, evmm_num_of_aps);
   590      for(int i=0; i<4; i++) {
   591          bprint("presence[%d]= %d, ", i, ap_presence_array[i]);
   592      }
   593      bprint("\n");
   594  #endif
   595      return g_aps_counter;
   596  }
   597  
   598  
   599  typedef struct {
   600      void*           any_data1;
   601      void*           any_data2;
   602      void*           any_data3;
   603      UINT64          ep;
   604  } APPLICATION_PARAMS_STRUCT;
   605  
   606  
   607  // Run user specified function on all APs.
   608  // If user function returns it should return in the protected 32bit mode. In this
   609  // case APs enter the wait state once more.
   610  //  continue_ap_boot_func - user given function to continue AP boot
   611  //  any_data - data to be passed to the function
   612  void ap_procs_run(FUNC_CONTINUE_AP_BOOT continue_ap_boot_func, void *any_data)
   613  {
   614  #ifdef JLMDEBUG
   615      extern void start_application(uint32_t cpu_id, 
   616                    const APPLICATION_PARAMS_STRUCT *params);
   617      bprint("ap_procs_run function: %p, ready counter: %d, aps counter: %d\n", 
   618             continue_ap_boot_func, g_ready_counter, g_aps_counter);
   619      bprint("start_application: %p\n", start_application);
   620  #endif
   621      g_user_func= continue_ap_boot_func;
   622      g_any_data_for_user_func= any_data;
   623  
   624      // signal to APs to pass to the next stage
   625      mp_set_bootstrap_state(MP_BOOTSTRAP_STATE_APS_ENUMERATED);
   626  
   627      // wait until all APs accept this
   628      while (g_ready_counter<g_aps_counter) {
   629          __asm__ volatile ( "\tpause\n" :::);
   630      }
   631  #ifdef JLMDEBUG
   632      bprint("ap_procs_run function returning %d %d\n",
   633             g_ready_counter, g_aps_counter);
   634  #endif
   635      return;
   636  }
   637  
   638  
   639  // Function  : bsp_enumerate_aps
   640  // Purpose   : Walk through ap_presence_array and count discovered APs.
   641  //           : and modifies array thus it will contain AP IDs,
   642  //           : and not just 1/0.
   643  // Return    : Total number of APs, discovered till now.
   644  // Notes     : Should be called on BSP
   645  uint8_t bsp_enumerate_aps(void)
   646  {
   647      int       i;
   648      uint8_t   ap_num = 0;
   649  
   650      for (i = 1; i<NELEMENTS(ap_presence_array); ++i) {
   651          if (0 != ap_presence_array[i]) {
   652              ap_presence_array[i] = ++ap_num;
   653          }
   654      }
   655      return ap_num;
   656  }
   657  
   658  
   659  void ap_initialize_environment(void)
   660  {
   661      mp_bootstrap_state = MP_BOOTSTRAP_STATE_INIT;
   662      g_ready_counter = 0;
   663      g_user_func = 0;
   664      g_any_data_for_user_func = 0;
   665  }
   666  
   667  
   668  void mp_set_bootstrap_state(uint32_t new_state)
   669  {
   670  #ifdef JLMDEBUG
   671      bprint("mp_set_bootstrap_state %d\n", new_state);
   672  #endif
   673      __asm__ volatile (
   674          "\tpush  %%eax\n"
   675          "\tmovl  %[new_state], %%eax\n"
   676          "\tlock; xchgl %%eax, %[mp_bootstrap_state]\n"
   677          "\tpopl  %%eax\n"
   678      : [mp_bootstrap_state] "=m" (mp_bootstrap_state)
   679      : [new_state] "g" (new_state)
   680      : "%eax");
   681      return;
   682  }
   683  
   684  
   685  void send_init_ipi(void)
   686  {
   687      send_ipi_to_all_excluding_self(0, LOCAL_APIC_DELIVERY_MODE_INIT);
   688  }
   689  
   690  void send_sipi_ipi(void* code_start)
   691  {
   692  #ifdef JLMDEBUG
   693      bprint("send_ipi_to_all_excluding_self\n");
   694  #endif
   695      // SIPI message contains address of the code, shifted right to 12 bits
   696      send_ipi_to_all_excluding_self(((uint32_t)code_start)>>12, 
   697          LOCAL_APIC_DELIVERY_MODE_SIPI);
   698  }
   699  
   700  
   701  // Send INIT IPI - SIPI to all APs in broadcast mode
   702  void send_broadcast_init_sipi(INIT32_STRUCT *p_init32_data)
   703  {
   704  #ifdef JLMDEBUG
   705      bprint("send_broadcast_init_sipi\n");
   706  #endif
   707      send_init_ipi();
   708      startap_stall_using_tsc(10000); // timeout - 10 milliseconds
   709  
   710  #ifdef JLMDEBUG
   711      bprint("back from stall\n");
   712  #endif
   713      // SIPI message contains address of the code, shifted right to 12 bits
   714      // send it twice - according to manual
   715      send_sipi_ipi((void *)p_init32_data->i32_low_memory_page);
   716      startap_stall_using_tsc(200000); // timeout - 200 milliseconds
   717  #ifdef JLMDEBUG
   718      bprint("back from first send_sipi_ipi\n");
   719  #endif
   720      send_sipi_ipi((void*)p_init32_data->i32_low_memory_page);
   721      startap_stall_using_tsc(200000); // timeout - 200 milliseconds
   722  #ifdef JLMDEBUG
   723      bprint("back from second send_sipi_ipi\n");
   724  #endif
   725  }
   726  
   727  
   728  // send IPI
   729  void send_ipi_to_all_excluding_self(uint32_t vector_number, 
   730                     uint32_t delivery_mode)
   731  {
   732      IA32_ICR_LOW           icr_low;
   733      IA32_ICR_LOW           icr_low_status;
   734      IA32_ICR_HIGH          icr_high;
   735      UINT64                 apic_base = 0;
   736  
   737  #ifdef JLMDEBG1
   738      bprint("send_ipi_to_all_excluding_self(%d, %d)\n",
   739                   vector_number, delivery_mode);
   740  #endif
   741      vmm_memset(&icr_low, 0, sizeof(IA32_ICR_LOW));
   742      vmm_memset(&icr_low_status, 0, sizeof(IA32_ICR_LOW));
   743      vmm_memset(&icr_high, 0, sizeof(IA32_ICR_HIGH));
   744      icr_low.bits.vector= vector_number;
   745      icr_low.bits.delivery_mode = delivery_mode;
   746  
   747      // level is set to 1 (except for INIT_DEASSERT, 
   748      //   which is not supported in P3 and P4)
   749      // trigger mode is set to 0 (except for INIT_DEASSERT)
   750      icr_low.bits.level = 1;
   751      icr_low.bits.trigger_mode = 0;
   752  
   753      // broadcast mode - ALL_EXCLUDING_SELF
   754      icr_low.bits.destination_shorthand = 
   755              LOCAL_APIC_BROADCAST_MODE_ALL_EXCLUDING_SELF;
   756  
   757      // send
   758      ia32_read_msr(IA32_MSR_APIC_BASE, &apic_base);
   759      apic_base &= LOCAL_APIC_BASE_MSR_MASK;
   760      do {
   761          *(uint32_t*)&icr_low_status= *(uint32_t*)(uint32_t)
   762                      (apic_base+LOCAL_APIC_ICR_OFFSET);
   763      } while (icr_low_status.bits.delivery_status!=0);
   764  
   765  #ifdef JLMDEBUG
   766      bprint("vector: 0x%04x, ", vector_number);
   767      bprint("mode: 0x%04x, ", delivery_mode);
   768      bprint("pointer hi: 0x%08x, ", *(uint32_t*)&icr_high);
   769      bprint("low: 0x%08x\n", *(uint32_t*)&icr_low);
   770  #endif
   771      *(uint32_t*)(uint32_t)(apic_base+LOCAL_APIC_ICR_OFFSET_HIGH)=
   772                  *(uint32_t*)&icr_high;
   773      *(uint32_t*)(uint32_t)(apic_base+LOCAL_APIC_ICR_OFFSET)= 
   774                  *(uint32_t*)&icr_low;
   775  
   776      do {
   777          startap_stall_using_tsc(10000);
   778          *(uint32_t*)&icr_low_status= *(uint32_t*)(uint32_t)
   779                  (apic_base+LOCAL_APIC_ICR_OFFSET);
   780      } while (icr_low_status.bits.delivery_status!=0);
   781  #ifdef JLMDEBUG
   782      bprint("returning from send_ipi_to_all_excluding_self\n");
   783  #endif
   784      return;
   785  }
   786