github.com/goccy/go-jit@v0.0.0-20200514131505-ff78d45cf6af/internal/ccall/jit-rules-x86-64.c (about) 1 /* 2 * jit-rules-x86-64.c - Rules that define the characteristics of the x86_64. 3 * 4 * Copyright (C) 2008 Southern Storm Software, Pty Ltd. 5 * 6 * This file is part of the libjit library. 7 * 8 * The libjit library is free software: you can redistribute it and/or 9 * modify it under the terms of the GNU Lesser General Public License 10 * as published by the Free Software Foundation, either version 2.1 of 11 * the License, or (at your option) any later version. 12 * 13 * The libjit library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Lesser General Public License for more details. 17 * 18 * You should have received a copy of the GNU Lesser General Public 19 * License along with the libjit library. If not, see 20 * <http://www.gnu.org/licenses/>. 21 */ 22 23 #include "jit-internal.h" 24 #include "jit-rules.h" 25 #include "jit-apply-rules.h" 26 27 #if defined(JIT_BACKEND_X86_64) 28 29 #include "jit-gen-x86-64.h" 30 #include "jit-reg-alloc.h" 31 #include "jit-setjmp.h" 32 #include <stdio.h> 33 34 /* 35 * Pseudo register numbers for the x86_64 registers. These are not the 36 * same as the CPU instruction register numbers. The order of these 37 * values must match the order in "JIT_REG_INFO". 38 */ 39 #define X86_64_REG_RAX 0 40 #define X86_64_REG_RCX 1 41 #define X86_64_REG_RDX 2 42 #define X86_64_REG_RBX 3 43 #define X86_64_REG_RSI 4 44 #define X86_64_REG_RDI 5 45 #define X86_64_REG_R8 6 46 #define X86_64_REG_R9 7 47 #define X86_64_REG_R10 8 48 #define X86_64_REG_R11 9 49 #define X86_64_REG_R12 10 50 #define X86_64_REG_R13 11 51 #define X86_64_REG_R14 12 52 #define X86_64_REG_R15 13 53 #define X86_64_REG_RBP 14 54 #define X86_64_REG_RSP 15 55 #define X86_64_REG_XMM0 16 56 #define X86_64_REG_XMM1 17 57 #define X86_64_REG_XMM2 18 58 #define X86_64_REG_XMM3 19 59 #define X86_64_REG_XMM4 20 60 #define X86_64_REG_XMM5 21 61 #define X86_64_REG_XMM6 22 62 #define X86_64_REG_XMM7 23 63 #define X86_64_REG_XMM8 24 64 #define X86_64_REG_XMM9 25 65 #define X86_64_REG_XMM10 26 66 #define X86_64_REG_XMM11 27 67 #define X86_64_REG_XMM12 28 68 #define X86_64_REG_XMM13 29 69 #define X86_64_REG_XMM14 30 70 #define X86_64_REG_XMM15 31 71 #define X86_64_REG_ST0 32 72 #define X86_64_REG_ST1 33 73 #define X86_64_REG_ST2 34 74 #define X86_64_REG_ST3 35 75 #define X86_64_REG_ST4 36 76 #define X86_64_REG_ST5 37 77 #define X86_64_REG_ST6 38 78 #define X86_64_REG_ST7 39 79 80 /* 81 * Determine if a pseudo register number is general, xmm or fpu. 82 */ 83 #define IS_GENERAL_REG(reg) (((reg) & ~0x0f) == 0) 84 #define IS_XMM_REG(reg) (((reg) & ~0x0f) == 0x10) 85 #define IS_FPU_REG(reg) (((reg) & ~0x0f) == 0x20) 86 87 /* 88 * Scratch register, that is used for calls via register and 89 * for loading the exception pc to the setjmp buffer. 90 * This register *MUST* not be used for parameter passing and 91 * *MUST* not be a callee saved register. 92 * For SysV abi R11 is perfect. 93 */ 94 #define X86_64_SCRATCH X86_64_R11 95 96 /* 97 * Set this definition to 1 if the OS supports the SysV red zone. 98 * This is a 128 byte area below the stack pointer that is guaranteed 99 * to be not modified by interrupts or signal handlers. 100 * This allows us to use a temporary area on the stack without 101 * having to modify the stack pointer saving us two instructions. 102 * TODO: Make this a configure switch. 103 */ 104 #define HAVE_RED_ZONE 1 105 106 /* 107 * Some declarations that should be replaced by querying the cpuinfo 108 * if generating code for the current cpu. 109 */ 110 /* 111 #define HAVE_X86_SSE_4_1 0 112 #define HAVE_X86_SSE_4 0 113 #define HAVE_X86_SSE_3 0 114 #define HAVE_X86_FISTTP 0 115 */ 116 117 #define TODO() \ 118 do { \ 119 fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \ 120 } while(0) 121 122 /* 123 * Setup or teardown the x86 code output process. 124 */ 125 #define jit_cache_setup_output(needed) \ 126 unsigned char *inst = gen->ptr; \ 127 _jit_gen_check_space(gen, (needed)) 128 129 #define jit_cache_end_output() \ 130 gen->ptr = inst 131 132 /* 133 * Set this to 1 for debugging fixups 134 */ 135 #define DEBUG_FIXUPS 0 136 137 /* 138 * The maximum block size copied inline 139 */ 140 #define _JIT_MAX_MEMCPY_INLINE 0x40 141 142 /* 143 * The maximum block size set inline 144 */ 145 #define _JIT_MAX_MEMSET_INLINE 0x80 146 147 /* 148 * va_list type as specified in x86_64 sysv abi version 0.99 149 * Figure 3.34 150 */ 151 typedef struct 152 { 153 unsigned int gp_offset; 154 unsigned int fp_offset; 155 void *overflow_arg_area; 156 void *reg_save_area; 157 } _jit_va_list; 158 159 /* Registers used for INTEGER arguments */ 160 static int _jit_word_arg_regs[] = {X86_64_REG_RDI, X86_64_REG_RSI, 161 X86_64_REG_RDX, X86_64_REG_RCX, 162 X86_64_REG_R8, X86_64_REG_R9}; 163 #define _jit_num_word_regs 6 164 165 /* Registers used for float arguments */ 166 static int _jit_float_arg_regs[] = {X86_64_REG_XMM0, X86_64_REG_XMM1, 167 X86_64_REG_XMM2, X86_64_REG_XMM3, 168 X86_64_REG_XMM4, X86_64_REG_XMM5, 169 X86_64_REG_XMM6, X86_64_REG_XMM7}; 170 #define _jit_num_float_regs 8 171 172 /* Registers used for returning INTEGER values */ 173 static int _jit_word_return_regs[] = {X86_64_REG_RAX, X86_64_REG_RDX}; 174 #define _jit_num_word_return_regs 2 175 176 /* Registers used for returning sse values */ 177 static int _jit_sse_return_regs[] = {X86_64_REG_XMM0, X86_64_REG_XMM1}; 178 #define _jit_num_sse_return_regs 2 179 180 /* 181 * X86_64 register classes 182 */ 183 static _jit_regclass_t *x86_64_reg; /* X86_64 general purpose registers */ 184 static _jit_regclass_t *x86_64_creg; /* X86_64 call clobbered general */ 185 /* purpose registers */ 186 static _jit_regclass_t *x86_64_dreg; /* general purpose registers that */ 187 /* can be used as divisor */ 188 /* (all but %rax and %rdx) */ 189 static _jit_regclass_t *x86_64_rreg; /* general purpose registers not used*/ 190 /* for returning values */ 191 static _jit_regclass_t *x86_64_sreg; /* general purpose registers that can*/ 192 /* be used for the value to be */ 193 /* shifted (all but %rcx)*/ 194 /* for returning values */ 195 static _jit_regclass_t *x86_64_freg; /* X86_64 fpu registers */ 196 static _jit_regclass_t *x86_64_xreg; /* X86_64 xmm registers */ 197 198 void 199 _jit_init_backend(void) 200 { 201 x86_64_reg = _jit_regclass_create( 202 "reg", JIT_REG_WORD | JIT_REG_LONG, 14, 203 X86_64_REG_RAX, X86_64_REG_RCX, 204 X86_64_REG_RDX, X86_64_REG_RBX, 205 X86_64_REG_RSI, X86_64_REG_RDI, 206 X86_64_REG_R8, X86_64_REG_R9, 207 X86_64_REG_R10, X86_64_REG_R11, 208 X86_64_REG_R12, X86_64_REG_R13, 209 X86_64_REG_R14, X86_64_REG_R15); 210 211 /* register class with all call clobbered registers */ 212 x86_64_creg = _jit_regclass_create( 213 "creg", JIT_REG_WORD | JIT_REG_LONG, 9, 214 X86_64_REG_RAX, X86_64_REG_RCX, 215 X86_64_REG_RDX, X86_64_REG_RSI, 216 X86_64_REG_RDI, X86_64_REG_R8, 217 X86_64_REG_R9, X86_64_REG_R10, 218 X86_64_REG_R11); 219 220 /* r egister class for divisors */ 221 x86_64_dreg = _jit_regclass_create( 222 "dreg", JIT_REG_WORD | JIT_REG_LONG, 12, 223 X86_64_REG_RCX, X86_64_REG_RBX, 224 X86_64_REG_RSI, X86_64_REG_RDI, 225 X86_64_REG_R8, X86_64_REG_R9, 226 X86_64_REG_R10, X86_64_REG_R11, 227 X86_64_REG_R12, X86_64_REG_R13, 228 X86_64_REG_R14, X86_64_REG_R15); 229 230 /* register class with all registers not used for returning values */ 231 x86_64_rreg = _jit_regclass_create( 232 "rreg", JIT_REG_WORD | JIT_REG_LONG, 12, 233 X86_64_REG_RCX, X86_64_REG_RBX, 234 X86_64_REG_RSI, X86_64_REG_RDI, 235 X86_64_REG_R8, X86_64_REG_R9, 236 X86_64_REG_R10, X86_64_REG_R11, 237 X86_64_REG_R12, X86_64_REG_R13, 238 X86_64_REG_R14, X86_64_REG_R15); 239 240 /* register class with all registers that can be used for shifted values */ 241 x86_64_sreg = _jit_regclass_create( 242 "sreg", JIT_REG_WORD | JIT_REG_LONG, 13, 243 X86_64_REG_RAX, X86_64_REG_RDX, 244 X86_64_REG_RBX, X86_64_REG_RSI, 245 X86_64_REG_RDI, X86_64_REG_R8, 246 X86_64_REG_R9, X86_64_REG_R10, 247 X86_64_REG_R11, X86_64_REG_R12, 248 X86_64_REG_R13, X86_64_REG_R14, 249 X86_64_REG_R15); 250 251 x86_64_freg = _jit_regclass_create( 252 "freg", JIT_REG_X86_64_FLOAT | JIT_REG_IN_STACK, 8, 253 X86_64_REG_ST0, X86_64_REG_ST1, 254 X86_64_REG_ST2, X86_64_REG_ST3, 255 X86_64_REG_ST4, X86_64_REG_ST5, 256 X86_64_REG_ST6, X86_64_REG_ST7); 257 258 x86_64_xreg = _jit_regclass_create( 259 "xreg", JIT_REG_FLOAT32 | JIT_REG_FLOAT64, 16, 260 X86_64_REG_XMM0, X86_64_REG_XMM1, 261 X86_64_REG_XMM2, X86_64_REG_XMM3, 262 X86_64_REG_XMM4, X86_64_REG_XMM5, 263 X86_64_REG_XMM6, X86_64_REG_XMM7, 264 X86_64_REG_XMM8, X86_64_REG_XMM9, 265 X86_64_REG_XMM10, X86_64_REG_XMM11, 266 X86_64_REG_XMM12, X86_64_REG_XMM13, 267 X86_64_REG_XMM14, X86_64_REG_XMM15); 268 } 269 270 int 271 _jit_opcode_is_supported(int opcode) 272 { 273 switch(opcode) 274 { 275 #define JIT_INCLUDE_SUPPORTED 276 #include "jit-rules-x86-64.inc" 277 #undef JIT_INCLUDE_SUPPORTED 278 } 279 return 0; 280 } 281 282 int 283 _jit_setup_indirect_pointer(jit_function_t func, jit_value_t value) 284 { 285 return jit_insn_outgoing_reg(func, value, X86_64_REG_R11); 286 } 287 288 /* 289 * Do a xmm operation with a constant float32 value 290 */ 291 static int 292 _jit_xmm1_reg_imm_size_float32(jit_gencode_t gen, unsigned char **inst_ptr, 293 X86_64_XMM1_OP opc, int reg, 294 jit_float32 *float32_value) 295 { 296 void *ptr; 297 jit_nint offset; 298 unsigned char *inst; 299 300 inst = *inst_ptr; 301 ptr = _jit_gen_alloc(gen, sizeof(jit_float32)); 302 if(!ptr) 303 { 304 return 0; 305 } 306 jit_memcpy(ptr, float32_value, sizeof(jit_float32)); 307 308 offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8)); 309 if((offset >= jit_min_int) && (offset <= jit_max_int)) 310 { 311 /* We can use RIP relative addressing here */ 312 x86_64_xmm1_reg_membase(inst, opc, reg, 313 X86_64_RIP, offset, 0); 314 } 315 else if(((jit_nint)ptr >= jit_min_int) && 316 ((jit_nint)ptr <= jit_max_int)) 317 { 318 /* We can use absolute addressing */ 319 x86_64_xmm1_reg_mem(inst, opc, reg, (jit_nint)ptr, 0); 320 } 321 else 322 { 323 /* We have to use an extra general register */ 324 TODO(); 325 return 0; 326 } 327 *inst_ptr = inst; 328 return 1; 329 } 330 331 /* 332 * Do a xmm operation with a constant float64 value 333 */ 334 static int 335 _jit_xmm1_reg_imm_size_float64(jit_gencode_t gen, unsigned char **inst_ptr, 336 X86_64_XMM1_OP opc, int reg, 337 jit_float64 *float64_value) 338 { 339 void *ptr; 340 jit_nint offset; 341 unsigned char *inst; 342 343 inst = *inst_ptr; 344 ptr = _jit_gen_alloc(gen, sizeof(jit_float64)); 345 if(!ptr) 346 { 347 return 0; 348 } 349 jit_memcpy(ptr, float64_value, sizeof(jit_float64)); 350 351 offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8)); 352 if((offset >= jit_min_int) && (offset <= jit_max_int)) 353 { 354 /* We can use RIP relative addressing here */ 355 x86_64_xmm1_reg_membase(inst, opc, reg, 356 X86_64_RIP, offset, 1); 357 } 358 else if(((jit_nint)ptr >= jit_min_int) && 359 ((jit_nint)ptr <= jit_max_int)) 360 { 361 /* We can use absolute addressing */ 362 x86_64_xmm1_reg_mem(inst, opc, reg, (jit_nint)ptr, 1); 363 } 364 else 365 { 366 /* We have to use an extra general register */ 367 TODO(); 368 return 0; 369 } 370 *inst_ptr = inst; 371 return 1; 372 } 373 374 /* 375 * Do a logical xmm operation with packed float32 values 376 */ 377 static int 378 _jit_plops_reg_imm(jit_gencode_t gen, unsigned char **inst_ptr, 379 X86_64_XMM_PLOP opc, int reg, void *packed_value) 380 { 381 void *ptr; 382 jit_nint offset; 383 unsigned char *inst; 384 385 inst = *inst_ptr; 386 ptr = _jit_gen_alloc(gen, 16); 387 if(!ptr) 388 { 389 return 0; 390 } 391 jit_memcpy(ptr, packed_value, 16); 392 393 /* calculate the offset for membase addressing */ 394 offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 8 : 7)); 395 if((offset >= jit_min_int) && (offset <= jit_max_int)) 396 { 397 /* We can use RIP relative addressing here */ 398 x86_64_plops_reg_membase(inst, opc, reg, X86_64_RIP, offset); 399 *inst_ptr = inst; 400 return 1; 401 } 402 /* Check if mem addressing can be used */ 403 if(((jit_nint)ptr >= jit_min_int) && 404 ((jit_nint)ptr <= jit_max_int)) 405 { 406 /* We can use absolute addressing */ 407 x86_64_plops_reg_mem(inst, opc, reg, (jit_nint)ptr); 408 *inst_ptr = inst; 409 return 1; 410 } 411 /* We have to use an extra general register */ 412 TODO(); 413 return 0; 414 } 415 416 /* 417 * Do a logical xmm operation with packed float64 values 418 */ 419 static int 420 _jit_plopd_reg_imm(jit_gencode_t gen, unsigned char **inst_ptr, 421 X86_64_XMM_PLOP opc, int reg, void *packed_value) 422 { 423 void *ptr; 424 jit_nint offset; 425 unsigned char *inst; 426 427 inst = *inst_ptr; 428 ptr = _jit_gen_alloc(gen, 16); 429 if(!ptr) 430 { 431 return 0; 432 } 433 jit_memcpy(ptr, packed_value, 16); 434 435 /* calculate the offset for membase addressing */ 436 offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8)); 437 if((offset >= jit_min_int) && (offset <= jit_max_int)) 438 { 439 /* We can use RIP relative addressing here */ 440 x86_64_plopd_reg_membase(inst, opc, reg, X86_64_RIP, offset); 441 *inst_ptr = inst; 442 return 1; 443 } 444 /* Check if mem addressing can be used */ 445 if(((jit_nint)ptr >= jit_min_int) && 446 ((jit_nint)ptr <= jit_max_int)) 447 { 448 /* We can use absolute addressing */ 449 x86_64_plopd_reg_mem(inst, opc, reg, (jit_nint)ptr); 450 *inst_ptr = inst; 451 return 1; 452 } 453 /* We have to use an extra general register */ 454 TODO(); 455 return 0; 456 } 457 458 /* 459 * Helpers for saving and setting roundmode in the fpu control word 460 * and restoring it afterwards. 461 * The rounding mode bits are bit 10 and 11 in the fpu control word. 462 * sp_offset is the start offset of a temporary eight byte block. 463 */ 464 static unsigned char * 465 _x86_64_set_fpu_roundmode(unsigned char *inst, int scratch_reg, 466 int sp_offset, X86_64_ROUNDMODE mode) 467 { 468 int fpcw_save_offset = sp_offset + 4; 469 int fpcw_new_offset = sp_offset; 470 int round_mode = ((int)mode) << 10; 471 int round_mode_mask = ~(((int)X86_ROUND_ZERO) << 10); 472 473 /* store FPU control word */ 474 x86_64_fnstcw_membase(inst, X86_64_RSP, fpcw_save_offset); 475 /* load the value into the scratch register */ 476 x86_64_mov_reg_membase_size(inst, scratch_reg, X86_64_RSP, fpcw_save_offset, 2); 477 /* Set the rounding mode */ 478 if(mode != X86_ROUND_ZERO) 479 { 480 /* Not all bits are set in the mask so we have to clear it first */ 481 x86_64_and_reg_imm_size(inst, scratch_reg, round_mode_mask, 2); 482 } 483 x86_64_or_reg_imm_size(inst, scratch_reg, round_mode, 2); 484 /* Store the new round mode */ 485 x86_64_mov_membase_reg_size(inst, X86_64_RSP, fpcw_new_offset, scratch_reg, 2); 486 /* Now load the new control word */ 487 x86_64_fldcw_membase(inst, X86_64_RSP, fpcw_new_offset); 488 489 return inst; 490 } 491 492 static unsigned char * 493 _x86_64_restore_fpcw(unsigned char *inst, int sp_offset) 494 { 495 int fpcw_save_offset = sp_offset + 4; 496 497 /* Now load the saved control word */ 498 x86_64_fldcw_membase(inst, X86_64_RSP, fpcw_save_offset); 499 500 return inst; 501 } 502 503 /* 504 * Helpers for saving and setting roundmode in the mxcsr register and 505 * restoring it afterwards. 506 * The rounding mode bits are bit 13 and 14 in the mxcsr register. 507 * sp_offset is the start offset of a temporary eight byte block. 508 */ 509 static unsigned char * 510 _x86_64_set_xmm_roundmode(unsigned char *inst, int scratch_reg, 511 int sp_offset, X86_64_ROUNDMODE mode) 512 { 513 int mxcsr_save_offset = sp_offset + 4; 514 int mxcsr_new_offset = sp_offset; 515 int round_mode = ((int)mode) << 13; 516 int round_mode_mask = ~(((int)X86_ROUND_ZERO) << 13); 517 518 /* save the mxcsr register */ 519 x86_64_stmxcsr_membase(inst, X86_64_RSP, mxcsr_save_offset); 520 /* Load the contents of the mxcsr register into the scratch register */ 521 x86_64_mov_reg_membase_size(inst, scratch_reg, X86_64_RSP, mxcsr_save_offset, 4); 522 /* Set the rounding mode */ 523 if(mode != X86_ROUND_ZERO) 524 { 525 /* Not all bits are set in the mask so we have to clear it first */ 526 x86_64_and_reg_imm_size(inst, scratch_reg, round_mode_mask, 4); 527 } 528 x86_64_or_reg_imm_size(inst, scratch_reg, round_mode, 4); 529 /* Store the new round mode */ 530 x86_64_mov_membase_reg_size(inst, X86_64_RSP, mxcsr_new_offset, scratch_reg, 4); 531 /* and load it to the mxcsr register */ 532 x86_64_ldmxcsr_membase(inst, X86_64_RSP, mxcsr_new_offset); 533 534 return inst; 535 } 536 537 static unsigned char * 538 _x86_64_restore_mxcsr(unsigned char *inst, int sp_offset) 539 { 540 int mxcsr_save_offset = sp_offset + 4; 541 542 /* restore the mxcsr register */ 543 x86_64_ldmxcsr_membase(inst, X86_64_RSP, mxcsr_save_offset); 544 545 return inst; 546 } 547 548 /* 549 * perform rounding of scalar single precision values. 550 * We have to use the fpu where see4.1 is not supported. 551 */ 552 static unsigned char * 553 x86_64_rounds_reg_reg(unsigned char *inst, int dreg, int sreg, 554 int scratch_reg, X86_64_ROUNDMODE mode) 555 { 556 #ifdef HAVE_RED_ZONE 557 #ifdef HAVE_X86_SSE_4_1 558 x86_64_roundss_reg_reg(inst, dreg, sreg, mode); 559 #else 560 /* Copy the xmm register to the stack */ 561 x86_64_movss_membase_reg(inst, X86_64_RSP, -16, sreg); 562 /* Set the fpu round mode */ 563 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode); 564 /* Load the value to the fpu */ 565 x86_64_fld_membase_size(inst, X86_64_RSP, -16, 4); 566 /* And round it to integer */ 567 x86_64_frndint(inst); 568 /* restore the fpu control word */ 569 inst = _x86_64_restore_fpcw(inst, -8); 570 /* and move st(0) to the destination register */ 571 x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 4); 572 x86_64_movss_reg_membase(inst, dreg, X86_64_RSP, -16); 573 #endif 574 #else 575 #ifdef HAVE_X86_SSE_4_1 576 x86_64_roundss_reg_reg(inst, dreg, sreg, mode); 577 #else 578 /* allocate space on the stack for two ints and one long value */ 579 x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8); 580 /* Copy the xmm register to the stack */ 581 x86_64_movss_regp_reg(inst, X86_64_RSP, sreg); 582 /* Set the fpu round mode */ 583 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode); 584 /* Load the value to the fpu */ 585 x86_64_fld_regp_size(inst, X86_64_RSP, 4); 586 /* And round it to integer */ 587 x86_64_frndint(inst); 588 /* restore the fpu control word */ 589 inst = _x86_64_restore_fpcw(inst, 8); 590 /* and move st(0) to the destination register */ 591 x86_64_fstp_regp_size(inst, X86_64_RSP, 4); 592 x86_64_movss_reg_regp(inst, dreg, X86_64_RSP); 593 /* restore the stack pointer */ 594 x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8); 595 #endif 596 #endif 597 return inst; 598 } 599 600 static unsigned char * 601 x86_64_rounds_reg_membase(unsigned char *inst, int dreg, int offset, 602 int scratch_reg, X86_64_ROUNDMODE mode) 603 { 604 #ifdef HAVE_RED_ZONE 605 #ifdef HAVE_X86_SSE_4_1 606 x86_64_roundss_reg_membase(inst, dreg, X86_64_RBP, offset, mode); 607 #else 608 /* Load the value to the fpu */ 609 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4); 610 /* Set the fpu round mode */ 611 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode); 612 /* And round it to integer */ 613 x86_64_frndint(inst); 614 /* restore the fpu control word */ 615 inst = _x86_64_restore_fpcw(inst, -8); 616 /* and move st(0) to the destination register */ 617 x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 4); 618 x86_64_movss_reg_membase(inst, dreg, X86_64_RSP, -16); 619 #endif 620 #else 621 #ifdef HAVE_X86_SSE_4_1 622 x86_64_roundss_reg_membase(inst, dreg, X86_64_RBP, offset, mode); 623 #else 624 /* allocate space on the stack for two ints and one long value */ 625 x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8); 626 /* Load the value to the fpu */ 627 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4); 628 /* Set the fpu round mode */ 629 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode); 630 /* And round it to integer */ 631 x86_64_frndint(inst); 632 /* restore the fpu control word */ 633 inst = _x86_64_restore_fpcw(inst, 8); 634 /* and move st(0) to the destination register */ 635 x86_64_fstp_regp_size(inst, X86_64_RSP, 4); 636 x86_64_movss_reg_regp(inst, dreg, X86_64_RSP); 637 /* restore the stack pointer */ 638 x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8); 639 #endif 640 #endif 641 return inst; 642 } 643 644 /* 645 * perform rounding of scalar double precision values. 646 * We have to use the fpu where see4.1 is not supported. 647 */ 648 static unsigned char * 649 x86_64_roundd_reg_reg(unsigned char *inst, int dreg, int sreg, 650 int scratch_reg, X86_64_ROUNDMODE mode) 651 { 652 #ifdef HAVE_RED_ZONE 653 #ifdef HAVE_X86_SSE_4_1 654 x86_64_roundsd_reg_reg(inst, dreg, sreg, mode); 655 #else 656 /* Copy the xmm register to the stack */ 657 x86_64_movsd_membase_reg(inst, X86_64_RSP, -16, sreg); 658 /* Set the fpu round mode */ 659 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode); 660 /* Load the value to the fpu */ 661 x86_64_fld_membase_size(inst, X86_64_RSP, -16, 8); 662 /* And round it to integer */ 663 x86_64_frndint(inst); 664 /* restore the fpu control word */ 665 inst = _x86_64_restore_fpcw(inst, -8); 666 /* and move st(0) to the destination register */ 667 x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 8); 668 x86_64_movsd_reg_membase(inst, dreg, X86_64_RSP, -16); 669 #endif 670 #else 671 #ifdef HAVE_X86_SSE_4_1 672 x86_64_roundsd_reg_reg(inst, dreg, sreg, mode); 673 #else 674 /* allocate space on the stack for two ints and one long value */ 675 x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8); 676 /* Copy the xmm register to the stack */ 677 x86_64_movsd_regp_reg(inst, X86_64_RSP, sreg); 678 /* Set the fpu round mode */ 679 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode); 680 /* Load the value to the fpu */ 681 x86_64_fld_regp_size(inst, X86_64_RSP, 8); 682 /* And round it to integer */ 683 x86_64_frndint(inst); 684 /* restore the fpu control word */ 685 inst = _x86_64_restore_fpcw(inst, 8); 686 /* and move st(0) to the destination register */ 687 x86_64_fstp_regp_size(inst, X86_64_RSP, 8); 688 x86_64_movsd_reg_regp(inst, dreg, X86_64_RSP); 689 /* restore the stack pointer */ 690 x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8); 691 #endif 692 #endif 693 return inst; 694 } 695 696 static unsigned char * 697 x86_64_roundd_reg_membase(unsigned char *inst, int dreg, int offset, 698 int scratch_reg, X86_64_ROUNDMODE mode) 699 { 700 #ifdef HAVE_RED_ZONE 701 #ifdef HAVE_X86_SSE_4_1 702 x86_64_roundsd_reg_membase(inst, dreg, X86_64_RBP, offset, mode); 703 #else 704 /* Load the value to the fpu */ 705 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8); 706 /* Set the fpu round mode */ 707 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode); 708 /* And round it to integer */ 709 x86_64_frndint(inst); 710 /* restore the fpu control word */ 711 inst = _x86_64_restore_fpcw(inst, -8); 712 /* and move st(0) to the destination register */ 713 x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 8); 714 x86_64_movsd_reg_membase(inst, dreg, X86_64_RSP, -16); 715 #endif 716 #else 717 #ifdef HAVE_X86_SSE_4_1 718 x86_64_roundsd_reg_membase(inst, dreg, X86_64_RBP, offset, mode); 719 #else 720 /* allocate space on the stack for two ints and one long value */ 721 x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8); 722 /* Load the value to the fpu */ 723 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8); 724 /* Set the fpu round mode */ 725 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode); 726 /* And round it to integer */ 727 x86_64_frndint(inst); 728 /* restore the fpu control word */ 729 inst = _x86_64_restore_fpcw(inst, 8); 730 /* and move st(0) to the destination register */ 731 x86_64_fstp_regp_size(inst, X86_64_RSP, 8); 732 x86_64_movsd_reg_regp(inst, dreg, X86_64_RSP); 733 /* restore the stack pointer */ 734 x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8); 735 #endif 736 #endif 737 return inst; 738 } 739 740 /* 741 * Round the value in St(0) to integer according to the rounding 742 * mode specified. 743 */ 744 static unsigned char * 745 x86_64_roundnf(unsigned char *inst, int scratch_reg, X86_64_ROUNDMODE mode) 746 { 747 #ifdef HAVE_RED_ZONE 748 /* Set the fpu round mode */ 749 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode); 750 /* And round it to integer */ 751 x86_64_frndint(inst); 752 /* restore the fpu control word */ 753 inst = _x86_64_restore_fpcw(inst, -8); 754 #else 755 /* allocate space on the stack for two ints and one long value */ 756 x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); 757 /* Set the fpu round mode */ 758 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 0, mode); 759 /* And round it to integer */ 760 x86_64_frndint(inst); 761 /* restore the fpu control word */ 762 inst = _x86_64_restore_fpcw(inst, 0); 763 /* restore the stack pointer */ 764 x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8); 765 #endif 766 return inst; 767 } 768 769 /* 770 * Round the value in the fpu register st(0) to integer and 771 * store the value in dreg. St(0) is popped from the fpu stack. 772 */ 773 static unsigned char * 774 x86_64_nfloat_to_int(unsigned char *inst, int dreg, int scratch_reg, int size) 775 { 776 #ifdef HAVE_RED_ZONE 777 #ifdef HAVE_X86_FISTTP 778 /* convert float to int */ 779 x86_64_fisttp_membase_size(inst, X86_64_RSP, -8, 4); 780 /* move result to the destination */ 781 x86_64_mov_reg_membase_size(inst, dreg, X86_64_RSP, -8, 4); 782 #else 783 /* Set the fpu round mode */ 784 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, X86_ROUND_ZERO); 785 /* And round the value in st(0) to integer and store it on the stack */ 786 x86_64_fistp_membase_size(inst, X86_64_RSP, -16, size); 787 /* restore the fpu control word */ 788 inst = _x86_64_restore_fpcw(inst, -8); 789 /* and load the integer to the destination register */ 790 x86_64_mov_reg_membase_size(inst, dreg, X86_64_RSP, -16, size); 791 #endif 792 #else 793 #ifdef HAVE_X86_FISTTP 794 /* allocate space on the stack for one long value */ 795 x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8); 796 /* convert float to int */ 797 x86_64_fisttp_regp_size(inst, X86_64_RSP, 4); 798 /* move result to the destination */ 799 x86_64_mov_reg_regp_size(inst, dreg, X86_64_RSP, 4); 800 /* restore the stack pointer */ 801 x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8); 802 #else 803 /* allocate space on the stack for 2 ints and one long value */ 804 x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8); 805 /* Set the fpu round mode */ 806 inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, X86_ROUND_ZERO); 807 /* And round the value in st(0) to integer and store it on the stack */ 808 x86_64_fistp_regp_size(inst, X86_64_RSP, size); 809 /* restore the fpu control word */ 810 inst = _x86_64_restore_fpcw(inst, 8); 811 /* and load the integer to the destination register */ 812 x86_64_mov_reg_regp_size(inst, dreg, X86_64_RSP, size); 813 /* restore the stack pointer */ 814 x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8); 815 #endif 816 #endif 817 return inst; 818 } 819 820 /* 821 * Call a function 822 */ 823 static unsigned char * 824 x86_64_call_code(unsigned char *inst, jit_nint func) 825 { 826 jit_nint offset; 827 828 x86_64_mov_reg_imm_size(inst, X86_64_RAX, 8, 4); 829 offset = func - ((jit_nint)inst + 5); 830 if(offset >= jit_min_int && offset <= jit_max_int) 831 { 832 /* We can use the immediate call */ 833 x86_64_call_imm(inst, offset); 834 } 835 else 836 { 837 /* We have to do a call via register */ 838 x86_64_mov_reg_imm_size(inst, X86_64_SCRATCH, func, 8); 839 x86_64_call_reg(inst, X86_64_SCRATCH); 840 } 841 return inst; 842 } 843 844 /* 845 * Jump to a function 846 */ 847 static unsigned char * 848 x86_64_jump_to_code(unsigned char *inst, jit_nint func) 849 { 850 jit_nint offset; 851 852 offset = func - ((jit_nint)inst + 5); 853 if(offset >= jit_min_int && offset <= jit_max_int) 854 { 855 /* We can use the immediate call */ 856 x86_64_jmp_imm(inst, offset); 857 } 858 else 859 { 860 /* We have to do a call via register */ 861 x86_64_mov_reg_imm_size(inst, X86_64_SCRATCH, func, 8); 862 x86_64_jmp_reg(inst, X86_64_SCRATCH); 863 } 864 return inst; 865 } 866 867 /* 868 * Throw a builtin exception. 869 */ 870 static unsigned char * 871 throw_builtin(unsigned char *inst, jit_function_t func, int type) 872 { 873 /* We need to update "catch_pc" if we have a "try" block */ 874 if(func->builder->setjmp_value != 0) 875 { 876 _jit_gen_fix_value(func->builder->setjmp_value); 877 878 x86_64_lea_membase_size(inst, X86_64_RDI, X86_64_RIP, 0, 8); 879 x86_64_mov_membase_reg_size(inst, X86_64_RBP, 880 func->builder->setjmp_value->frame_offset 881 + jit_jmp_catch_pc_offset, X86_64_RDI, 8); 882 } 883 884 /* Push the exception type onto the stack */ 885 x86_64_mov_reg_imm_size(inst, X86_64_RDI, type, 4); 886 887 /* Call the "jit_exception_builtin" function, which will never return */ 888 return x86_64_call_code(inst, (jit_nint)jit_exception_builtin); 889 } 890 891 /* 892 * spill a register to it's place in the current stack frame. 893 * The argument type must be in it's normalized form. 894 */ 895 static void 896 _spill_reg(unsigned char **inst_ptr, jit_type_t type, 897 jit_int reg, jit_int offset) 898 { 899 unsigned char *inst = *inst_ptr; 900 901 if(IS_GENERAL_REG(reg)) 902 { 903 switch(type->kind) 904 { 905 #if 0 906 case JIT_TYPE_SBYTE: 907 case JIT_TYPE_UBYTE: 908 { 909 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 910 _jit_reg_info[reg].cpu_reg, 1); 911 } 912 break; 913 914 case JIT_TYPE_SHORT: 915 case JIT_TYPE_USHORT: 916 { 917 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 918 _jit_reg_info[reg].cpu_reg, 2); 919 } 920 break; 921 #else 922 case JIT_TYPE_SBYTE: 923 case JIT_TYPE_UBYTE: 924 case JIT_TYPE_SHORT: 925 case JIT_TYPE_USHORT: 926 #endif 927 case JIT_TYPE_INT: 928 case JIT_TYPE_UINT: 929 case JIT_TYPE_FLOAT32: 930 { 931 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 932 _jit_reg_info[reg].cpu_reg, 4); 933 } 934 break; 935 936 case JIT_TYPE_LONG: 937 case JIT_TYPE_ULONG: 938 case JIT_TYPE_FLOAT64: 939 { 940 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 941 _jit_reg_info[reg].cpu_reg, 8); 942 } 943 break; 944 945 case JIT_TYPE_STRUCT: 946 case JIT_TYPE_UNION: 947 { 948 jit_nuint size = jit_type_get_size(type); 949 950 if(size == 1) 951 { 952 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 953 _jit_reg_info[reg].cpu_reg, 1); 954 } 955 else if(size == 2) 956 { 957 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 958 _jit_reg_info[reg].cpu_reg, 2); 959 } 960 else if(size <= 4) 961 { 962 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 963 _jit_reg_info[reg].cpu_reg, 4); 964 } 965 else 966 { 967 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 968 _jit_reg_info[reg].cpu_reg, 8); 969 } 970 } 971 } 972 } 973 else if(IS_XMM_REG(reg)) 974 { 975 switch(type->kind) 976 { 977 case JIT_TYPE_FLOAT32: 978 { 979 x86_64_movss_membase_reg(inst, X86_64_RBP, offset, 980 _jit_reg_info[reg].cpu_reg); 981 } 982 break; 983 984 case JIT_TYPE_FLOAT64: 985 { 986 x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, 987 _jit_reg_info[reg].cpu_reg); 988 } 989 break; 990 991 case JIT_TYPE_STRUCT: 992 case JIT_TYPE_UNION: 993 { 994 jit_nuint size = jit_type_get_size(type); 995 996 if(size <= 4) 997 { 998 x86_64_movss_membase_reg(inst, X86_64_RBP, offset, 999 _jit_reg_info[reg].cpu_reg); 1000 } 1001 else if(size <= 8) 1002 { 1003 x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, 1004 _jit_reg_info[reg].cpu_reg); 1005 } 1006 else 1007 { 1008 jit_nint alignment = jit_type_get_alignment(type); 1009 1010 if((alignment & 0xf) == 0) 1011 { 1012 x86_64_movaps_membase_reg(inst, X86_64_RBP, offset, 1013 _jit_reg_info[reg].cpu_reg); 1014 } 1015 else 1016 { 1017 x86_64_movups_membase_reg(inst, X86_64_RBP, offset, 1018 _jit_reg_info[reg].cpu_reg); 1019 } 1020 } 1021 } 1022 break; 1023 } 1024 } 1025 else if(IS_FPU_REG(reg)) 1026 { 1027 switch(type->kind) 1028 { 1029 case JIT_TYPE_FLOAT32: 1030 { 1031 x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 4); 1032 } 1033 break; 1034 1035 case JIT_TYPE_FLOAT64: 1036 { 1037 x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8); 1038 } 1039 break; 1040 1041 case JIT_TYPE_NFLOAT: 1042 { 1043 if(sizeof(jit_nfloat) == sizeof(jit_float64)) 1044 { 1045 x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8); 1046 } 1047 else 1048 { 1049 x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 10); 1050 } 1051 } 1052 break; 1053 } 1054 } 1055 1056 /* Write the current instruction pointer back */ 1057 *inst_ptr = inst; 1058 } 1059 1060 void 1061 _jit_gen_fix_value(jit_value_t value) 1062 { 1063 if(!(value->has_frame_offset) && !(value->is_constant)) 1064 { 1065 jit_nuint alignment = jit_type_get_alignment(value->type); 1066 jit_nint size =jit_type_get_size(value->type); 1067 jit_nint frame_size = value->block->func->builder->frame_size; 1068 1069 /* Round the size to a multiple of the stack item size */ 1070 size = (jit_nint)(ROUND_STACK(size)); 1071 1072 /* Add the size to the existing local items */ 1073 frame_size += size; 1074 1075 /* Align the new frame_size for the value */ 1076 frame_size = (frame_size + (alignment - 1)) & ~(alignment - 1); 1077 1078 value->block->func->builder->frame_size = frame_size; 1079 value->frame_offset = -frame_size; 1080 value->has_frame_offset = 1; 1081 } 1082 } 1083 1084 void 1085 _jit_gen_spill_global(jit_gencode_t gen, int reg, jit_value_t value) 1086 { 1087 jit_cache_setup_output(16); 1088 if(value) 1089 { 1090 jit_type_t type = jit_type_normalize(value->type); 1091 1092 _jit_gen_fix_value(value); 1093 1094 _spill_reg(&inst, type, value->global_reg, value->frame_offset); 1095 } 1096 else 1097 { 1098 x86_64_push_reg_size(inst, _jit_reg_info[reg].cpu_reg, 8); 1099 } 1100 jit_cache_end_output(); 1101 } 1102 1103 void 1104 _jit_gen_load_global(jit_gencode_t gen, int reg, jit_value_t value) 1105 { 1106 jit_cache_setup_output(16); 1107 if(value) 1108 { 1109 x86_64_mov_reg_membase_size(inst, 1110 _jit_reg_info[value->global_reg].cpu_reg, 1111 X86_64_RBP, value->frame_offset, 8); 1112 } 1113 else 1114 { 1115 x86_64_pop_reg_size(inst, _jit_reg_info[reg].cpu_reg, 8); 1116 } 1117 jit_cache_end_output(); 1118 } 1119 1120 void 1121 _jit_gen_spill_reg(jit_gencode_t gen, int reg, 1122 int other_reg, jit_value_t value) 1123 { 1124 jit_type_t type; 1125 1126 /* Make sure that we have sufficient space */ 1127 jit_cache_setup_output(16); 1128 1129 /* If the value is associated with a global register, then copy to that */ 1130 if(value->has_global_register) 1131 { 1132 reg = _jit_reg_info[reg].cpu_reg; 1133 other_reg = _jit_reg_info[value->global_reg].cpu_reg; 1134 x86_64_mov_reg_reg_size(inst, other_reg, reg, sizeof(void *)); 1135 jit_cache_end_output(); 1136 return; 1137 } 1138 1139 /* Fix the value in place within the local variable frame */ 1140 _jit_gen_fix_value(value); 1141 1142 /* Get the normalized type */ 1143 type = jit_type_normalize(value->type); 1144 1145 /* and spill the register */ 1146 _spill_reg(&inst, type, reg, value->frame_offset); 1147 1148 /* End the code output process */ 1149 jit_cache_end_output(); 1150 } 1151 1152 void 1153 _jit_gen_free_reg(jit_gencode_t gen, int reg, 1154 int other_reg, int value_used) 1155 { 1156 /* We only need to take explicit action if we are freeing a 1157 floating-point register whose value hasn't been used yet */ 1158 if(!value_used && IS_FPU_REG(reg)) 1159 { 1160 _jit_gen_check_space(gen, 2); 1161 x86_fstp(gen->ptr, reg - X86_64_REG_ST0); 1162 } 1163 } 1164 1165 /* 1166 * Set a register value based on a condition code. 1167 */ 1168 static unsigned char * 1169 setcc_reg(unsigned char *inst, int reg, int cond, int is_signed) 1170 { 1171 /* Use a SETcc instruction if we have a basic register */ 1172 x86_64_set_reg(inst, cond, reg, is_signed); 1173 x86_64_movzx8_reg_reg_size(inst, reg, reg, 4); 1174 return inst; 1175 } 1176 1177 /* 1178 * Helper macros for fixup handling. 1179 * 1180 * We have only 4 bytes for the jump offsets. 1181 * Therefore we have do something tricky here. 1182 * The fixup pointer in the block/gen points to the last fixup. 1183 * The fixup itself contains the offset to the previous fixup or 1184 * null if it's the last fixup in the list. 1185 */ 1186 1187 /* 1188 * Calculate the fixup value 1189 * This is the value stored as placeholder in the instruction. 1190 */ 1191 #define _JIT_CALC_FIXUP(fixup_list, inst) \ 1192 ((jit_int)((jit_nint)(inst) - (jit_nint)(fixup_list))) 1193 1194 /* 1195 * Calculate the pointer to the fixup value. 1196 */ 1197 #define _JIT_CALC_NEXT_FIXUP(fixup_list, fixup) \ 1198 ((fixup) ? ((jit_nint)(fixup_list) - (jit_nint)(fixup)) : (jit_nint)0) 1199 1200 /* 1201 * Get the long form of a branch opcode. 1202 */ 1203 static int 1204 long_form_branch(int opcode) 1205 { 1206 if(opcode == 0xEB) 1207 { 1208 return 0xE9; 1209 } 1210 else 1211 { 1212 return opcode + 0x0F10; 1213 } 1214 } 1215 1216 /* 1217 * Output a branch instruction. 1218 */ 1219 static unsigned char * 1220 output_branch(jit_function_t func, unsigned char *inst, int opcode, 1221 jit_insn_t insn) 1222 { 1223 jit_block_t block; 1224 1225 if((insn->flags & JIT_INSN_VALUE1_IS_LABEL) != 0) 1226 { 1227 /* "address_of_label" instruction */ 1228 block = jit_block_from_label(func, (jit_label_t)(insn->value1)); 1229 } 1230 else 1231 { 1232 block = jit_block_from_label(func, (jit_label_t)(insn->dest)); 1233 } 1234 if(!block) 1235 { 1236 return inst; 1237 } 1238 if(block->address) 1239 { 1240 jit_nint offset; 1241 1242 /* We already know the address of the block */ 1243 offset = ((unsigned char *)(block->address)) - (inst + 2); 1244 if(x86_is_imm8(offset)) 1245 { 1246 /* We can output a short-form backwards branch */ 1247 *inst++ = (unsigned char)opcode; 1248 *inst++ = (unsigned char)offset; 1249 } 1250 else 1251 { 1252 /* We need to output a long-form backwards branch */ 1253 offset -= 3; 1254 opcode = long_form_branch(opcode); 1255 if(opcode < 256) 1256 { 1257 *inst++ = (unsigned char)opcode; 1258 } 1259 else 1260 { 1261 *inst++ = (unsigned char)(opcode >> 8); 1262 *inst++ = (unsigned char)opcode; 1263 --offset; 1264 } 1265 x86_imm_emit32(inst, offset); 1266 } 1267 } 1268 else 1269 { 1270 jit_int fixup; 1271 1272 /* Output a placeholder and record on the block's fixup list */ 1273 opcode = long_form_branch(opcode); 1274 if(opcode < 256) 1275 { 1276 *inst++ = (unsigned char)opcode; 1277 } 1278 else 1279 { 1280 *inst++ = (unsigned char)(opcode >> 8); 1281 *inst++ = (unsigned char)opcode; 1282 } 1283 if(block->fixup_list) 1284 { 1285 fixup = _JIT_CALC_FIXUP(block->fixup_list, inst); 1286 } 1287 else 1288 { 1289 fixup = 0; 1290 } 1291 block->fixup_list = (void *)inst; 1292 x86_imm_emit32(inst, fixup); 1293 1294 if(DEBUG_FIXUPS) 1295 { 1296 fprintf(stderr, 1297 "Block: %lx, Current Fixup: %lx, Next fixup: %lx\n", 1298 (jit_nint)block, (jit_nint)(block->fixup_list), 1299 (jit_nint)fixup); 1300 } 1301 } 1302 return inst; 1303 } 1304 1305 /* 1306 * Jump to the current function's epilog. 1307 */ 1308 static unsigned char * 1309 jump_to_epilog(jit_gencode_t gen, unsigned char *inst, jit_block_t block) 1310 { 1311 jit_int fixup; 1312 1313 /* If the epilog is the next thing that we will output, 1314 then fall through to the epilog directly */ 1315 if(_jit_block_is_final(block)) 1316 { 1317 return inst; 1318 } 1319 1320 /* Output a placeholder for the jump and add it to the fixup list */ 1321 *inst++ = (unsigned char)0xE9; 1322 if(gen->epilog_fixup) 1323 { 1324 fixup = _JIT_CALC_FIXUP(gen->epilog_fixup, inst); 1325 } 1326 else 1327 { 1328 fixup = 0; 1329 } 1330 gen->epilog_fixup = (void *)inst; 1331 x86_imm_emit32(inst, fixup); 1332 return inst; 1333 } 1334 1335 /* 1336 * fixup a register being alloca'd to by accounting for the param area 1337 */ 1338 static unsigned char * 1339 fixup_alloca(jit_gencode_t gen, unsigned char *inst, int reg) 1340 { 1341 #ifdef JIT_USE_PARAM_AREA 1342 jit_int fixup; 1343 jit_int temp; 1344 1345 /* 1346 * emit the instruction and then replace the imm section of op with 1347 * the fixup. 1348 * NOTE: We are using the temp variable here to avoid a compiler 1349 * warning and the temp value to make sure that an instruction with 1350 * a 32 bit immediate is emitted. The temp value in the instruction 1351 * will be replaced by the fixup 1352 */ 1353 temp = 1234567; 1354 x86_64_add_reg_imm_size(inst, reg, temp, 8); 1355 1356 /* Make inst pointing to the 32bit immediate in the instruction */ 1357 inst -= 4; 1358 1359 /* calculalte the fixup */ 1360 if (gen->alloca_fixup) 1361 { 1362 fixup = _JIT_CALC_FIXUP(gen->alloca_fixup, inst); 1363 } 1364 else 1365 { 1366 fixup = 0; 1367 } 1368 gen->alloca_fixup = (void *)inst; 1369 x86_imm_emit32(inst, fixup); 1370 #else /* !JIT_USE_PARAM_AREA */ 1371 /* alloca fixup is not needed if the param area is not used */ 1372 #endif /* JIT_USE_PARAM_AREA */ 1373 return inst; 1374 } 1375 1376 /* 1377 * Compare a xmm register with an immediate value. 1378 */ 1379 static unsigned char * 1380 xmm_cmp_reg_imm(jit_gencode_t gen, unsigned char *inst, int xreg, void *imm, 1381 int is_double) 1382 { 1383 int inst_len = 7 + (is_double ? 1 : 0) + (xreg > 7 ? 1 : 0); 1384 void *ptr; 1385 jit_nint offset; 1386 1387 if(is_double) 1388 { 1389 ptr = _jit_gen_alloc(gen, sizeof(jit_float64)); 1390 if(!ptr) 1391 { 1392 return 0; 1393 } 1394 jit_memcpy(ptr, imm, sizeof(jit_float64)); 1395 } 1396 else 1397 { 1398 ptr = _jit_gen_alloc(gen, sizeof(jit_float32)); 1399 if(!ptr) 1400 { 1401 return 0; 1402 } 1403 jit_memcpy(ptr, imm, sizeof(jit_float32)); 1404 } 1405 offset = (jit_nint)ptr - ((jit_nint)inst + inst_len); 1406 if((offset >= jit_min_int) && (offset <= jit_max_int)) 1407 { 1408 /* We can use RIP relative addressing here */ 1409 if(is_double) 1410 { 1411 x86_64_ucomisd_reg_membase(inst, xreg, X86_64_RIP, offset); 1412 } 1413 else 1414 { 1415 x86_64_ucomiss_reg_membase(inst, xreg, X86_64_RIP, offset); 1416 } 1417 } 1418 else if(((jit_nint)ptr >= jit_min_int) && 1419 ((jit_nint)ptr <= jit_max_int)) 1420 { 1421 /* We can use absolute addressing */ 1422 if(is_double) 1423 { 1424 x86_64_ucomisd_reg_mem(inst, xreg, (jit_nint)ptr); 1425 } 1426 else 1427 { 1428 x86_64_ucomiss_reg_mem(inst, xreg, (jit_nint)ptr); 1429 } 1430 } 1431 else 1432 { 1433 /* We have to use an extra general register */ 1434 TODO(); 1435 return 0; 1436 } 1437 return inst; 1438 } 1439 1440 /* 1441 * Compare two scalar float or double values and set dreg depending on the 1442 * flags set. 1443 * The result for nan values depends on nan_result. 1444 * If nan_result is == 0 then the result is 0 if any nan value is involved, 1445 * otherwise the result is true. 1446 */ 1447 static unsigned char * 1448 xmm_setcc(unsigned char *inst, int dreg, int cond, int sreg, int nan_result) 1449 { 1450 x86_64_set_reg(inst, cond, dreg, 0); 1451 if(nan_result) 1452 { 1453 /* 1454 * Check pf only for comparisions where a flag is checked 1455 * for 0 because an unordered result sets all flags. 1456 * The cases where the additional check is not needed is 1457 * eq, lt and le. 1458 */ 1459 if((cond != 0) && (cond != 2) && (cond != 3)) 1460 { 1461 x86_64_set_reg(inst, 8 /* p */ , sreg, 0); 1462 x86_64_or_reg_reg_size(inst, dreg, sreg, 4); 1463 } 1464 } 1465 else 1466 { 1467 /* 1468 * Check pf only for comparisions where a flag is checked 1469 * for 1 because an unordered result sets all flags. 1470 * The cases where the additional check is not needed is 1471 * ne, gt and ge. 1472 */ 1473 if((cond != 1) && (cond != 4) && (cond != 5)) 1474 { 1475 x86_64_set_reg(inst, 9 /* np */ , sreg, 0); 1476 x86_64_and_reg_reg_size(inst, dreg, sreg, 4); 1477 } 1478 } 1479 x86_64_movzx8_reg_reg_size(inst, dreg, dreg, 4); 1480 return inst; 1481 } 1482 1483 static unsigned char * 1484 xmm_cmp_setcc_reg_imm(jit_gencode_t gen, unsigned char *inst, int dreg, 1485 int cond, int xreg, void *imm, int sreg, int is_double, 1486 int nan_result) 1487 { 1488 inst = xmm_cmp_reg_imm(gen, inst, xreg, imm, is_double); 1489 return xmm_setcc(inst, dreg, cond, sreg, nan_result); 1490 } 1491 1492 static unsigned char * 1493 xmm_cmp_setcc_reg_reg(unsigned char *inst, int dreg, int cond, int xreg1, 1494 int xreg2, int sreg, int is_double, int nan_result) 1495 { 1496 if(is_double) 1497 { 1498 x86_64_ucomisd_reg_reg(inst, xreg1, xreg2); 1499 } 1500 else 1501 { 1502 x86_64_ucomiss_reg_reg(inst, xreg1, xreg2); 1503 } 1504 return xmm_setcc(inst, dreg, cond, sreg, nan_result); 1505 } 1506 1507 /* 1508 * Compare two float values and branch depending on the flags. 1509 */ 1510 static unsigned char * 1511 xmm_brcc(jit_function_t func, unsigned char *inst, int cond, int nan_result, 1512 jit_insn_t insn) 1513 { 1514 if(nan_result) 1515 { 1516 /* 1517 * Check pf only for comparisions where a flag is checked 1518 * for 0 because an unordered result sets all flags. 1519 * The cases where the additional check is not needed is 1520 * eq, lt and le. 1521 */ 1522 if((cond != 0) && (cond != 2) && (cond != 3)) 1523 { 1524 /* Branch if the parity flag is set */ 1525 inst = output_branch(func, inst, 1526 x86_cc_unsigned_map[8], insn); 1527 } 1528 inst = output_branch(func, inst, x86_cc_unsigned_map[cond], insn); 1529 } 1530 else 1531 { 1532 /* 1533 * Check pf only for comparisions where a flag is checked 1534 * for 1 because an unordered result sets all flags. 1535 * The cases where the additional check is not needed is 1536 * ne, gt and ge. 1537 */ 1538 if((cond != 1) && (cond != 4) && (cond != 5)) 1539 { 1540 unsigned char *patch; 1541 patch = inst; 1542 x86_branch8(inst, X86_CC_P, 0, 0); 1543 inst = output_branch(func, inst, 1544 x86_cc_unsigned_map[cond], insn); 1545 x86_patch(patch, inst); 1546 } 1547 else 1548 { 1549 inst = output_branch(func, inst, 1550 x86_cc_unsigned_map[cond], insn); 1551 } 1552 } 1553 return inst; 1554 } 1555 1556 static unsigned char * 1557 xmm_cmp_brcc_reg_imm(jit_gencode_t gen, jit_function_t func, 1558 unsigned char *inst, int cond, int xreg, void *imm, 1559 int is_double, int nan_result, jit_insn_t insn) 1560 { 1561 inst = xmm_cmp_reg_imm(gen, inst, xreg, imm, is_double); 1562 return xmm_brcc(func, inst, cond, nan_result, insn); 1563 } 1564 1565 static unsigned char * 1566 xmm_cmp_brcc_reg_reg(jit_function_t func, unsigned char *inst, int cond, 1567 int xreg1, int xreg2, int is_double, int nan_result, 1568 jit_insn_t insn) 1569 { 1570 if(is_double) 1571 { 1572 x86_64_ucomisd_reg_reg(inst, xreg1, xreg2); 1573 } 1574 else 1575 { 1576 x86_64_ucomiss_reg_reg(inst, xreg1, xreg2); 1577 } 1578 return xmm_brcc(func, inst, cond, nan_result, insn); 1579 } 1580 1581 static unsigned char * 1582 xmm_cmp_brcc_reg_membase(jit_function_t func, unsigned char *inst, int cond, 1583 int xreg1, int basereg, int offset, int is_double, 1584 int nan_result, jit_insn_t insn) 1585 { 1586 if(is_double) 1587 { 1588 x86_64_ucomisd_reg_membase(inst, xreg1, basereg, offset); 1589 } 1590 else 1591 { 1592 x86_64_ucomiss_reg_membase(inst, xreg1, basereg, offset); 1593 } 1594 return xmm_brcc(func, inst, cond, nan_result, insn); 1595 } 1596 1597 /* 1598 * Support functiond for the FPU stack 1599 */ 1600 1601 static int 1602 fp_stack_index(jit_gencode_t gen, int reg) 1603 { 1604 return gen->reg_stack_top - reg - 1; 1605 } 1606 1607 void 1608 _jit_gen_exch_top(jit_gencode_t gen, int reg) 1609 { 1610 if(IS_FPU_REG(reg)) 1611 { 1612 jit_cache_setup_output(2); 1613 x86_fxch(inst, fp_stack_index(gen, reg)); 1614 jit_cache_end_output(); 1615 } 1616 } 1617 1618 void 1619 _jit_gen_move_top(jit_gencode_t gen, int reg) 1620 { 1621 if(IS_FPU_REG(reg)) 1622 { 1623 jit_cache_setup_output(2); 1624 x86_fstp(inst, fp_stack_index(gen, reg)); 1625 jit_cache_end_output(); 1626 } 1627 } 1628 1629 void 1630 _jit_gen_spill_top(jit_gencode_t gen, int reg, jit_value_t value, int pop) 1631 { 1632 if(IS_FPU_REG(reg)) 1633 { 1634 int offset; 1635 1636 /* Make sure that we have sufficient space */ 1637 jit_cache_setup_output(16); 1638 1639 /* Fix the value in place within the local variable frame */ 1640 _jit_gen_fix_value(value); 1641 1642 /* Output an appropriate instruction to spill the value */ 1643 offset = (int)(value->frame_offset); 1644 1645 /* Spill the top of the floating-point register stack */ 1646 switch(jit_type_normalize(value->type)->kind) 1647 { 1648 case JIT_TYPE_FLOAT32: 1649 { 1650 if(pop) 1651 { 1652 x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 4); 1653 } 1654 else 1655 { 1656 x86_64_fst_membase_size(inst, X86_64_RBP, offset, 4); 1657 } 1658 } 1659 break; 1660 1661 case JIT_TYPE_FLOAT64: 1662 { 1663 if(pop) 1664 { 1665 x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8); 1666 } 1667 else 1668 { 1669 x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8); 1670 } 1671 } 1672 break; 1673 1674 case JIT_TYPE_NFLOAT: 1675 { 1676 if(sizeof(jit_nfloat) == sizeof(jit_float64)) 1677 { 1678 if(pop) 1679 { 1680 x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 8); 1681 } 1682 else 1683 { 1684 x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8); 1685 } 1686 } 1687 else 1688 { 1689 x86_64_fstp_membase_size(inst, X86_64_RBP, offset, 10); 1690 if(!pop) 1691 { 1692 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 10); 1693 } 1694 } 1695 } 1696 break; 1697 } 1698 1699 /* End the code output process */ 1700 jit_cache_end_output(); 1701 } 1702 } 1703 1704 void 1705 _jit_gen_load_value(jit_gencode_t gen, int reg, int other_reg, jit_value_t value) 1706 { 1707 jit_type_t type; 1708 int src_reg; 1709 void *ptr; 1710 int offset; 1711 1712 /* Make sure that we have sufficient space */ 1713 jit_cache_setup_output(16); 1714 1715 type = jit_type_normalize(value->type); 1716 1717 /* Load zero */ 1718 if(value->is_constant) 1719 { 1720 switch(type->kind) 1721 { 1722 case JIT_TYPE_SBYTE: 1723 case JIT_TYPE_UBYTE: 1724 case JIT_TYPE_SHORT: 1725 case JIT_TYPE_USHORT: 1726 case JIT_TYPE_INT: 1727 case JIT_TYPE_UINT: 1728 { 1729 if((jit_nint)(value->address) == 0) 1730 { 1731 x86_64_clear_reg(inst, _jit_reg_info[reg].cpu_reg); 1732 } 1733 else 1734 { 1735 x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, 1736 (jit_nint)(value->address), 4); 1737 } 1738 } 1739 break; 1740 1741 case JIT_TYPE_LONG: 1742 case JIT_TYPE_ULONG: 1743 { 1744 if((jit_nint)(value->address) == 0) 1745 { 1746 x86_64_clear_reg(inst, _jit_reg_info[reg].cpu_reg); 1747 } 1748 else 1749 { 1750 if((jit_nint)(value->address) > 0 && (jit_nint)(value->address) <= (jit_nint)jit_max_uint) 1751 { 1752 x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, 1753 (jit_nint)(value->address), 4); 1754 1755 } 1756 else 1757 { 1758 x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, 1759 (jit_nint)(value->address), 8); 1760 } 1761 } 1762 } 1763 break; 1764 1765 case JIT_TYPE_FLOAT32: 1766 { 1767 jit_float32 float32_value; 1768 1769 float32_value = jit_value_get_float32_constant(value); 1770 1771 if(IS_GENERAL_REG(reg)) 1772 { 1773 union 1774 { 1775 jit_float32 float32_value; 1776 jit_int int_value; 1777 } un; 1778 1779 un.float32_value = float32_value; 1780 x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, 1781 un.int_value, 4); 1782 } 1783 else if(IS_XMM_REG(reg)) 1784 { 1785 int xmm_reg = _jit_reg_info[reg].cpu_reg; 1786 1787 if(float32_value == (jit_float32) 0.0) 1788 { 1789 x86_64_clear_xreg(inst, xmm_reg); 1790 } 1791 else 1792 { 1793 _jit_xmm1_reg_imm_size_float32(gen, &inst, XMM1_MOV, 1794 xmm_reg, &float32_value); 1795 } 1796 } 1797 else 1798 { 1799 if(float32_value == (jit_float32) 0.0) 1800 { 1801 x86_fldz(inst); 1802 } 1803 else if(float32_value == (jit_float32) 1.0) 1804 { 1805 x86_fld1(inst); 1806 } 1807 else 1808 { 1809 jit_nint offset; 1810 1811 ptr = _jit_gen_alloc(gen, sizeof(jit_float32)); 1812 jit_memcpy(ptr, &float32_value, sizeof(float32_value)); 1813 1814 offset = (jit_nint)ptr - ((jit_nint)inst + 6); 1815 if((offset >= jit_min_int) && (offset <= jit_max_int)) 1816 { 1817 /* We can use RIP relative addressing here */ 1818 x86_64_fld_membase_size(inst, X86_64_RIP, offset, 4); 1819 } 1820 else if(((jit_nint)ptr >= jit_min_int) && 1821 ((jit_nint)ptr <= jit_max_int)) 1822 { 1823 /* We can use absolute addressing */ 1824 x86_64_fld_mem_size(inst, (jit_nint)ptr, 4); 1825 } 1826 else 1827 { 1828 /* We have to use an extra general register */ 1829 TODO(); 1830 } 1831 } 1832 } 1833 } 1834 break; 1835 1836 case JIT_TYPE_FLOAT64: 1837 { 1838 jit_float64 float64_value; 1839 float64_value = jit_value_get_float64_constant(value); 1840 if(IS_GENERAL_REG(reg)) 1841 { 1842 union 1843 { 1844 jit_float64 float64_value; 1845 jit_long long_value; 1846 } un; 1847 1848 un.float64_value = float64_value; 1849 x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, 1850 un.long_value, 8); 1851 } 1852 else if(IS_XMM_REG(reg)) 1853 { 1854 int xmm_reg = _jit_reg_info[reg].cpu_reg; 1855 1856 if(float64_value == (jit_float64) 0.0) 1857 { 1858 x86_64_clear_xreg(inst, xmm_reg); 1859 } 1860 else 1861 { 1862 _jit_xmm1_reg_imm_size_float64(gen, &inst, XMM1_MOV, 1863 xmm_reg, &float64_value); 1864 } 1865 } 1866 else 1867 { 1868 if(float64_value == (jit_float64) 0.0) 1869 { 1870 x86_fldz(inst); 1871 } 1872 else if(float64_value == (jit_float64) 1.0) 1873 { 1874 x86_fld1(inst); 1875 } 1876 else 1877 { 1878 jit_nint offset; 1879 1880 ptr = _jit_gen_alloc(gen, sizeof(jit_float64)); 1881 jit_memcpy(ptr, &float64_value, sizeof(float64_value)); 1882 1883 offset = (jit_nint)ptr - ((jit_nint)inst + 6); 1884 if((offset >= jit_min_int) && (offset <= jit_max_int)) 1885 { 1886 /* We can use RIP relative addressing here */ 1887 x86_64_fld_membase_size(inst, X86_64_RIP, offset, 8); 1888 } 1889 else if(((jit_nint)ptr >= jit_min_int) && 1890 ((jit_nint)ptr <= jit_max_int)) 1891 { 1892 /* We can use absolute addressing */ 1893 x86_64_fld_mem_size(inst, (jit_nint)ptr, 8); 1894 } 1895 else 1896 { 1897 /* We have to use an extra general register */ 1898 TODO(); 1899 } 1900 } 1901 } 1902 } 1903 break; 1904 1905 case JIT_TYPE_NFLOAT: 1906 { 1907 jit_nfloat nfloat_value; 1908 nfloat_value = jit_value_get_nfloat_constant(value); 1909 if(IS_GENERAL_REG(reg) && sizeof(jit_nfloat) == sizeof(jit_float64)) 1910 { 1911 union 1912 { 1913 jit_nfloat nfloat_value; 1914 jit_long long_value; 1915 } un; 1916 1917 un.nfloat_value = nfloat_value; 1918 x86_64_mov_reg_imm_size(inst, _jit_reg_info[reg].cpu_reg, 1919 un.long_value, 8); 1920 } 1921 else if(IS_XMM_REG(reg) && sizeof(jit_nfloat) == sizeof(jit_float64)) 1922 { 1923 jit_nint offset; 1924 int xmm_reg = _jit_reg_info[reg].cpu_reg; 1925 1926 ptr = _jit_gen_alloc(gen, sizeof(jit_nfloat)); 1927 jit_memcpy(ptr, &nfloat_value, sizeof(nfloat_value)); 1928 offset = (jit_nint)ptr - 1929 ((jit_nint)inst + (xmm_reg > 7 ? 9 : 8)); 1930 if((offset >= jit_min_int) && (offset <= jit_max_int)) 1931 { 1932 /* We can use RIP relative addressing here */ 1933 x86_64_movsd_reg_membase(inst, xmm_reg, X86_64_RIP, offset); 1934 } 1935 else if(((jit_nint)ptr >= jit_min_int) && 1936 ((jit_nint)ptr <= jit_max_int)) 1937 { 1938 /* We can use absolute addressing */ 1939 x86_64_movsd_reg_mem(inst, xmm_reg, (jit_nint)ptr); 1940 } 1941 else 1942 { 1943 /* We have to use an extra general register */ 1944 TODO(); 1945 } 1946 } 1947 else 1948 { 1949 if(nfloat_value == (jit_nfloat) 0.0) 1950 { 1951 x86_fldz(inst); 1952 } 1953 else if(nfloat_value == (jit_nfloat) 1.0) 1954 { 1955 x86_fld1(inst); 1956 } 1957 else 1958 { 1959 jit_nint offset; 1960 1961 ptr = _jit_gen_alloc(gen, sizeof(jit_nfloat)); 1962 jit_memcpy(ptr, &nfloat_value, sizeof(nfloat_value)); 1963 1964 offset = (jit_nint)ptr - ((jit_nint)inst + 6); 1965 if((offset >= jit_min_int) && (offset <= jit_max_int)) 1966 { 1967 /* We can use RIP relative addressing here */ 1968 if(sizeof(jit_nfloat) == sizeof(jit_float64)) 1969 { 1970 x86_64_fld_membase_size(inst, X86_64_RIP, offset, 8); 1971 } 1972 else 1973 { 1974 x86_64_fld_membase_size(inst, X86_64_RIP, offset, 10); 1975 } 1976 } 1977 else if(((jit_nint)ptr >= jit_min_int) && 1978 ((jit_nint)ptr <= jit_max_int)) 1979 { 1980 /* We can use absolute addressing */ 1981 if(sizeof(jit_nfloat) == sizeof(jit_float64)) 1982 { 1983 x86_64_fld_mem_size(inst, (jit_nint)ptr, 8); 1984 } 1985 else 1986 { 1987 x86_64_fld_mem_size(inst, (jit_nint)ptr, 10); 1988 } 1989 } 1990 else 1991 { 1992 /* We have to use an extra general register */ 1993 TODO(); 1994 } 1995 } 1996 } 1997 } 1998 break; 1999 } 2000 } 2001 else if(value->in_register || value->in_global_register) 2002 { 2003 if(value->in_register) 2004 { 2005 src_reg = value->reg; 2006 } 2007 else 2008 { 2009 src_reg = value->global_reg; 2010 } 2011 2012 switch(type->kind) 2013 { 2014 #if 0 2015 case JIT_TYPE_SBYTE: 2016 { 2017 x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg, 2018 _jit_reg_info[src_reg].cpu_reg, 1, 0); 2019 } 2020 break; 2021 2022 case JIT_TYPE_UBYTE: 2023 { 2024 x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg, 2025 _jit_reg_info[src_reg].cpu_reg, 0, 0); 2026 } 2027 break; 2028 2029 case JIT_TYPE_SHORT: 2030 { 2031 x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg, 2032 _jit_reg_info[src_reg].cpu_reg, 1, 1); 2033 } 2034 break; 2035 2036 case JIT_TYPE_USHORT: 2037 { 2038 x86_widen_reg(inst, _jit_reg_info[reg].cpu_reg, 2039 _jit_reg_info[src_reg].cpu_reg, 0, 1); 2040 } 2041 break; 2042 #else 2043 case JIT_TYPE_SBYTE: 2044 case JIT_TYPE_UBYTE: 2045 case JIT_TYPE_SHORT: 2046 case JIT_TYPE_USHORT: 2047 #endif 2048 case JIT_TYPE_INT: 2049 case JIT_TYPE_UINT: 2050 { 2051 x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg, 2052 _jit_reg_info[src_reg].cpu_reg, 4); 2053 } 2054 break; 2055 2056 case JIT_TYPE_LONG: 2057 case JIT_TYPE_ULONG: 2058 { 2059 x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg, 2060 _jit_reg_info[src_reg].cpu_reg, 8); 2061 } 2062 break; 2063 2064 case JIT_TYPE_FLOAT32: 2065 { 2066 if(IS_FPU_REG(reg)) 2067 { 2068 if(IS_FPU_REG(src_reg)) 2069 { 2070 x86_fld_reg(inst, fp_stack_index(gen, src_reg)); 2071 } 2072 else if(IS_XMM_REG(src_reg)) 2073 { 2074 /* Fix the position of the value in the stack frame */ 2075 _jit_gen_fix_value(value); 2076 offset = (int)(value->frame_offset); 2077 2078 x86_64_movss_membase_reg(inst, X86_64_RBP, offset, 2079 _jit_reg_info[src_reg].cpu_reg); 2080 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4); 2081 } 2082 } 2083 else if(IS_XMM_REG(reg)) 2084 { 2085 if(IS_FPU_REG(src_reg)) 2086 { 2087 /* Fix the position of the value in the stack frame */ 2088 _jit_gen_fix_value(value); 2089 offset = (int)(value->frame_offset); 2090 2091 x86_64_fst_membase_size(inst, X86_64_RBP, offset, 4); 2092 x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2093 X86_64_RBP, offset); 2094 } 2095 else if(IS_XMM_REG(src_reg)) 2096 { 2097 x86_64_movss_reg_reg(inst, _jit_reg_info[reg].cpu_reg, 2098 _jit_reg_info[src_reg].cpu_reg); 2099 } 2100 } 2101 } 2102 break; 2103 2104 case JIT_TYPE_FLOAT64: 2105 { 2106 if(IS_FPU_REG(reg)) 2107 { 2108 if(IS_FPU_REG(src_reg)) 2109 { 2110 x86_fld_reg(inst, fp_stack_index(gen, src_reg)); 2111 } 2112 else if(IS_XMM_REG(src_reg)) 2113 { 2114 /* Fix the position of the value in the stack frame */ 2115 _jit_gen_fix_value(value); 2116 offset = (int)(value->frame_offset); 2117 2118 x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, 2119 _jit_reg_info[src_reg].cpu_reg); 2120 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8); 2121 } 2122 } 2123 else if(IS_XMM_REG(reg)) 2124 { 2125 if(IS_FPU_REG(src_reg)) 2126 { 2127 /* Fix the position of the value in the stack frame */ 2128 _jit_gen_fix_value(value); 2129 offset = (int)(value->frame_offset); 2130 2131 x86_64_fst_membase_size(inst, X86_64_RBP, offset, 8); 2132 x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2133 X86_64_RBP, offset); 2134 } 2135 else if(IS_XMM_REG(src_reg)) 2136 { 2137 x86_64_movsd_reg_reg(inst, _jit_reg_info[reg].cpu_reg, 2138 _jit_reg_info[src_reg].cpu_reg); 2139 } 2140 } 2141 } 2142 break; 2143 2144 case JIT_TYPE_NFLOAT: 2145 { 2146 if(IS_FPU_REG(reg)) 2147 { 2148 if(IS_FPU_REG(src_reg)) 2149 { 2150 x86_fld_reg(inst, fp_stack_index(gen, src_reg)); 2151 } 2152 else 2153 { 2154 fputs("Unsupported native float reg - reg move\n", stderr); 2155 } 2156 } 2157 } 2158 break; 2159 2160 case JIT_TYPE_STRUCT: 2161 case JIT_TYPE_UNION: 2162 { 2163 if(IS_GENERAL_REG(reg)) 2164 { 2165 if(IS_GENERAL_REG(src_reg)) 2166 { 2167 x86_64_mov_reg_reg_size(inst, _jit_reg_info[reg].cpu_reg, 2168 _jit_reg_info[src_reg].cpu_reg, 8); 2169 } 2170 else if(IS_XMM_REG(src_reg)) 2171 { 2172 x86_64_movq_reg_xreg(inst, _jit_reg_info[reg].cpu_reg, 2173 _jit_reg_info[src_reg].cpu_reg); 2174 } 2175 else 2176 { 2177 fputs("Unsupported struct/union reg - reg move\n", stderr); 2178 } 2179 } 2180 else if(IS_XMM_REG(reg)) 2181 { 2182 if(IS_GENERAL_REG(src_reg)) 2183 { 2184 x86_64_movq_xreg_reg(inst, _jit_reg_info[reg].cpu_reg, 2185 _jit_reg_info[src_reg].cpu_reg); 2186 } 2187 else if(IS_XMM_REG(src_reg)) 2188 { 2189 x86_64_movaps_reg_reg(inst, _jit_reg_info[reg].cpu_reg, 2190 _jit_reg_info[src_reg].cpu_reg); 2191 } 2192 else 2193 { 2194 fputs("Unsupported struct/union reg - reg move\n", stderr); 2195 } 2196 } 2197 else 2198 { 2199 fputs("Unsupported struct/union reg - reg move\n", stderr); 2200 } 2201 } 2202 } 2203 } 2204 else 2205 { 2206 /* Fix the position of the value in the stack frame */ 2207 _jit_gen_fix_value(value); 2208 offset = (int)(value->frame_offset); 2209 2210 /* Load the value into the specified register */ 2211 switch(type->kind) 2212 { 2213 case JIT_TYPE_SBYTE: 2214 { 2215 x86_64_movsx8_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2216 X86_64_RBP, offset, 4); 2217 } 2218 break; 2219 2220 case JIT_TYPE_UBYTE: 2221 { 2222 x86_64_movzx8_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2223 X86_64_RBP, offset, 4); 2224 } 2225 break; 2226 2227 case JIT_TYPE_SHORT: 2228 { 2229 x86_64_movsx16_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2230 X86_64_RBP, offset, 4); 2231 } 2232 break; 2233 2234 case JIT_TYPE_USHORT: 2235 { 2236 x86_64_movzx16_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2237 X86_64_RBP, offset, 4); 2238 } 2239 break; 2240 2241 case JIT_TYPE_INT: 2242 case JIT_TYPE_UINT: 2243 { 2244 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2245 X86_64_RBP, offset, 4); 2246 } 2247 break; 2248 2249 case JIT_TYPE_LONG: 2250 case JIT_TYPE_ULONG: 2251 { 2252 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2253 X86_64_RBP, offset, 8); 2254 } 2255 break; 2256 2257 case JIT_TYPE_FLOAT32: 2258 { 2259 if(IS_GENERAL_REG(reg)) 2260 { 2261 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2262 X86_64_RBP, offset, 4); 2263 } 2264 if(IS_XMM_REG(reg)) 2265 { 2266 x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2267 X86_64_RBP, offset); 2268 } 2269 else 2270 { 2271 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4); 2272 } 2273 } 2274 break; 2275 2276 case JIT_TYPE_FLOAT64: 2277 { 2278 if(IS_GENERAL_REG(reg)) 2279 { 2280 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2281 X86_64_RBP, offset, 8); 2282 } 2283 else if(IS_XMM_REG(reg)) 2284 { 2285 x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2286 X86_64_RBP, offset); 2287 } 2288 else 2289 { 2290 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8); 2291 } 2292 } 2293 break; 2294 2295 case JIT_TYPE_NFLOAT: 2296 { 2297 if(sizeof(jit_nfloat) == sizeof(jit_float64)) 2298 { 2299 if(IS_GENERAL_REG(reg)) 2300 { 2301 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2302 X86_64_RBP, offset, 8); 2303 } 2304 else if(IS_XMM_REG(reg)) 2305 { 2306 x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2307 X86_64_RBP, offset); 2308 } 2309 else 2310 { 2311 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8); 2312 } 2313 } 2314 else 2315 { 2316 x86_64_fld_membase_size(inst, X86_64_RBP, offset, 10); 2317 } 2318 } 2319 break; 2320 2321 case JIT_TYPE_STRUCT: 2322 case JIT_TYPE_UNION: 2323 { 2324 jit_nuint size = jit_type_get_size(type); 2325 2326 if(IS_GENERAL_REG(reg)) 2327 { 2328 if(size == 1) 2329 { 2330 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2331 X86_64_RBP, offset, 1); 2332 } 2333 else if(size == 2) 2334 { 2335 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2336 X86_64_RBP, offset, 2); 2337 } 2338 else if(size <= 4) 2339 { 2340 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2341 X86_64_RBP, offset, 4); 2342 } 2343 else if(size <= 8) 2344 { 2345 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2346 X86_64_RBP, offset, 8); 2347 } 2348 } 2349 else if(IS_XMM_REG(reg)) 2350 { 2351 if(size <= 4) 2352 { 2353 x86_64_movss_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2354 X86_64_RBP, offset); 2355 } 2356 else if(size <= 8) 2357 { 2358 x86_64_movsd_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2359 X86_64_RBP, offset); 2360 } 2361 else 2362 { 2363 int alignment = jit_type_get_alignment(type); 2364 2365 if((alignment & 0xf) == 0) 2366 { 2367 x86_64_movaps_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2368 X86_64_RBP, offset); 2369 } 2370 else 2371 { 2372 x86_64_movups_reg_membase(inst, _jit_reg_info[reg].cpu_reg, 2373 X86_64_RBP, offset); 2374 } 2375 } 2376 } 2377 } 2378 } 2379 } 2380 2381 /* End the code output process */ 2382 jit_cache_end_output(); 2383 } 2384 2385 void 2386 _jit_gen_get_elf_info(jit_elf_info_t *info) 2387 { 2388 info->machine = 62; /* EM_X86_64 */ 2389 info->abi = 0; /* ELFOSABI_SYSV */ 2390 info->abi_version = 0; 2391 } 2392 2393 void * 2394 _jit_gen_prolog(jit_gencode_t gen, jit_function_t func, void *buf) 2395 { 2396 unsigned char prolog[JIT_PROLOG_SIZE]; 2397 unsigned char *inst = prolog; 2398 int reg; 2399 int frame_size = 0; 2400 int regs_to_save = 0; 2401 2402 /* Push ebp onto the stack */ 2403 x86_64_push_reg_size(inst, X86_64_RBP, 8); 2404 2405 /* Initialize EBP for the current frame */ 2406 x86_64_mov_reg_reg_size(inst, X86_64_RBP, X86_64_RSP, 8); 2407 2408 /* Allocate space for the local variable frame */ 2409 if(func->builder->frame_size > 0) 2410 { 2411 /* Make sure that the framesize is a multiple of 8 bytes */ 2412 frame_size = (func->builder->frame_size + 0x7) & ~0x7; 2413 } 2414 2415 /* Get the number of registers we need to preserve */ 2416 for(reg = 0; reg < 14; ++reg) 2417 { 2418 if(jit_reg_is_used(gen->touched, reg) && 2419 (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) 2420 { 2421 ++regs_to_save; 2422 } 2423 } 2424 2425 /* add the register save area to the initial frame size */ 2426 frame_size += (regs_to_save << 3); 2427 2428 #ifdef JIT_USE_PARAM_AREA 2429 /* Add the param area to the frame_size if the additional offset 2430 doesnt cause the offsets in the register saves become 4 bytes */ 2431 if(func->builder->param_area_size > 0 && 2432 (func->builder->param_area_size <= 0x50 || regs_to_save == 0)) 2433 { 2434 frame_size += func->builder->param_area_size; 2435 } 2436 #endif /* JIT_USE_PARAM_AREA */ 2437 2438 /* Make sure that the framesize is a multiple of 16 bytes */ 2439 /* so that the final RSP will be alligned on a 16byte boundary. */ 2440 frame_size = (frame_size + 0xf) & ~0xf; 2441 2442 if(frame_size > 0) 2443 { 2444 x86_64_sub_reg_imm_size(inst, X86_64_RSP, frame_size, 8); 2445 } 2446 2447 if(regs_to_save > 0) 2448 { 2449 int current_offset; 2450 #ifdef JIT_USE_PARAM_AREA 2451 if(func->builder->param_area_size > 0 && 2452 func->builder->param_area_size <= 0x50) 2453 { 2454 current_offset = func->builder->param_area_size; 2455 } 2456 else 2457 #endif /* JIT_USE_PARAM_AREA */ 2458 { 2459 current_offset = 0; 2460 } 2461 2462 /* Save registers that we need to preserve */ 2463 for(reg = 0; reg <= 14; ++reg) 2464 { 2465 if(jit_reg_is_used(gen->touched, reg) && 2466 (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) 2467 { 2468 x86_64_mov_membase_reg_size(inst, X86_64_RSP, current_offset, 2469 _jit_reg_info[reg].cpu_reg, 8); 2470 current_offset += 8; 2471 } 2472 } 2473 } 2474 #ifdef JIT_USE_PARAM_AREA 2475 if(func->builder->param_area_size > 0x50 && regs_to_save > 0) 2476 { 2477 x86_64_sub_reg_imm_size(inst, X86_64_RSP, func->builder->param_area_size, 8); 2478 } 2479 #endif /* JIT_USE_PARAM_AREA */ 2480 2481 /* Copy the prolog into place and return the adjusted entry position */ 2482 reg = (int)(inst - prolog); 2483 jit_memcpy(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg, prolog, reg); 2484 return (void *)(((unsigned char *)buf) + JIT_PROLOG_SIZE - reg); 2485 } 2486 2487 void 2488 _jit_gen_epilog(jit_gencode_t gen, jit_function_t func) 2489 { 2490 unsigned char *inst; 2491 int reg; 2492 int current_offset; 2493 jit_int *fixup; 2494 jit_int *next; 2495 2496 /* Bail out if there is insufficient space for the epilog */ 2497 _jit_gen_check_space(gen, 48); 2498 2499 inst = gen->ptr; 2500 2501 /* Perform fixups on any blocks that jump to the epilog */ 2502 fixup = (jit_int *)(gen->epilog_fixup); 2503 while(fixup != 0) 2504 { 2505 if(DEBUG_FIXUPS) 2506 { 2507 fprintf(stderr, "Fixup Address: %lx, Value: %x\n", 2508 (jit_nint)fixup, fixup[0]); 2509 } 2510 next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]); 2511 fixup[0] = (jit_int)(((jit_nint)inst) - ((jit_nint)fixup) - 4); 2512 fixup = next; 2513 } 2514 gen->epilog_fixup = 0; 2515 2516 /* Perform fixups on any alloca calls */ 2517 fixup = (jit_int *)(gen->alloca_fixup); 2518 while (fixup != 0) 2519 { 2520 next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]); 2521 fixup[0] = func->builder->param_area_size; 2522 if(DEBUG_FIXUPS) 2523 { 2524 fprintf(stderr, "Fixup Param Area Size: %lx, Value: %x\n", 2525 (jit_nint)fixup, fixup[0]); 2526 } 2527 fixup = next; 2528 } 2529 gen->alloca_fixup = 0; 2530 2531 /* Restore the used callee saved registers */ 2532 if(gen->stack_changed) 2533 { 2534 int frame_size = func->builder->frame_size; 2535 int regs_saved = 0; 2536 2537 /* Get the number of registers we preserves */ 2538 for(reg = 0; reg < 14; ++reg) 2539 { 2540 if(jit_reg_is_used(gen->touched, reg) && 2541 (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) 2542 { 2543 ++regs_saved; 2544 } 2545 } 2546 2547 /* add the register save area to the initial frame size */ 2548 frame_size += (regs_saved << 3); 2549 2550 /* Make sure that the framesize is a multiple of 16 bytes */ 2551 /* so that the final RSP will be alligned on a 16byte boundary. */ 2552 frame_size = (frame_size + 0xf) & ~0xf; 2553 2554 current_offset = -frame_size; 2555 2556 for(reg = 0; reg <= 14; ++reg) 2557 { 2558 if(jit_reg_is_used(gen->touched, reg) && 2559 (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) 2560 { 2561 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2562 X86_64_RBP, current_offset, 8); 2563 current_offset += 8; 2564 } 2565 } 2566 } 2567 else 2568 { 2569 #ifdef JIT_USE_PARAM_AREA 2570 if(func->builder->param_area_size > 0) 2571 { 2572 current_offset = func->builder->param_area_size; 2573 } 2574 else 2575 { 2576 current_offset = 0; 2577 } 2578 #else /* !JIT_USE_PARAM_AREA */ 2579 current_offset = 0; 2580 #endif /* !JIT_USE_PARAM_AREA */ 2581 for(reg = 0; reg <= 14; ++reg) 2582 { 2583 if(jit_reg_is_used(gen->touched, reg) && 2584 (_jit_reg_info[reg].flags & JIT_REG_CALL_USED) == 0) 2585 { 2586 x86_64_mov_reg_membase_size(inst, _jit_reg_info[reg].cpu_reg, 2587 X86_64_RSP, current_offset, 8); 2588 current_offset += 8; 2589 } 2590 } 2591 } 2592 2593 /* Restore stackpointer and frame register */ 2594 x86_64_mov_reg_reg_size(inst, X86_64_RSP, X86_64_RBP, 8); 2595 x86_64_pop_reg_size(inst, X86_64_RBP, 8); 2596 2597 /* and return */ 2598 x86_64_ret(inst); 2599 2600 gen->ptr = inst; 2601 } 2602 2603 /* 2604 * Copy a small block. This generates inlined code. 2605 * 2606 * Set is_aligned to zero if the source or target locations might be not 2607 * aligned on a 16-byte boundary and to non-zero if both blocks are always 2608 * aligned. 2609 * 2610 * We assume that offset + size is in the range -2GB ... +2GB. 2611 */ 2612 static unsigned char * 2613 small_block_copy(jit_gencode_t gen, unsigned char *inst, 2614 int dreg, jit_nint doffset, 2615 int sreg, jit_nint soffset, jit_int size, 2616 int scratch_reg, int scratch_xreg, int is_aligned) 2617 { 2618 jit_nint offset = 0; 2619 int i; 2620 2621 /* Copy all 16 byte blocks of the struct */ 2622 while(size >= 16) 2623 { 2624 if(is_aligned) 2625 { 2626 x86_64_movaps_reg_membase(inst, scratch_xreg, 2627 sreg, soffset + offset); 2628 x86_64_movaps_membase_reg(inst, dreg, doffset + offset, 2629 scratch_xreg); 2630 } 2631 else 2632 { 2633 x86_64_movups_reg_membase(inst, scratch_xreg, 2634 sreg, soffset + offset); 2635 x86_64_movups_membase_reg(inst, dreg, doffset + offset, 2636 scratch_xreg); 2637 } 2638 size -= 16; 2639 offset += 16; 2640 } 2641 2642 /* Now copy the rest of the struct */ 2643 for(i = 8; i > 0; i /= 2) 2644 { 2645 if(size >= i) 2646 { 2647 x86_64_mov_reg_membase_size(inst, scratch_reg, sreg, 2648 soffset + offset, i); 2649 x86_64_mov_membase_reg_size(inst, dreg, doffset + offset, 2650 scratch_reg, i); 2651 size -= i; 2652 offset += i; 2653 } 2654 } 2655 return inst; 2656 } 2657 2658 /* 2659 * Copy a struct. 2660 * The size of the type must be <= 4 * 16bytes 2661 */ 2662 static unsigned char * 2663 small_struct_copy(jit_gencode_t gen, unsigned char *inst, 2664 int dreg, jit_nint doffset, 2665 int sreg, jit_nint soffset, jit_type_t type, 2666 int scratch_reg, int scratch_xreg) 2667 { 2668 int size = jit_type_get_size(type); 2669 int alignment = jit_type_get_alignment(type); 2670 2671 return small_block_copy(gen, inst, dreg, doffset, 2672 sreg, soffset, size, scratch_reg, 2673 scratch_xreg, ((alignment & 0xf) == 0)); 2674 } 2675 2676 /* 2677 * Copy a block of memory that has a specific size. All call clobbered 2678 * registers must be unused at this point. 2679 */ 2680 static unsigned char * 2681 memory_copy(jit_gencode_t gen, unsigned char *inst, 2682 int dreg, jit_nint doffset, 2683 int sreg, jit_nint soffset, jit_nint size) 2684 { 2685 if(dreg == X86_64_RDI) 2686 { 2687 if(sreg != X86_64_RSI) 2688 { 2689 x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8); 2690 } 2691 } 2692 else if(dreg == X86_64_RSI) 2693 { 2694 if(sreg == X86_64_RDI) 2695 { 2696 /* The registers are swapped so we need a temporary register */ 2697 x86_64_mov_reg_reg_size(inst, X86_64_RCX, X86_64_RSI, 8); 2698 x86_64_mov_reg_reg_size(inst, X86_64_RSI, X86_64_RDI, 8); 2699 x86_64_mov_reg_reg_size(inst, X86_64_RDI, X86_64_RCX, 8); 2700 } 2701 else 2702 { 2703 x86_64_mov_reg_reg_size(inst, X86_64_RDI, X86_64_RSI, 8); 2704 if(sreg != X86_64_RSI) 2705 { 2706 x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8); 2707 } 2708 } 2709 } 2710 else 2711 { 2712 x86_64_mov_reg_reg_size(inst, X86_64_RSI, sreg, 8); 2713 x86_64_mov_reg_reg_size(inst, X86_64_RDI, dreg, 8); 2714 } 2715 /* Move the size to argument register 3 now */ 2716 if((size > 0) && (size <= jit_max_uint)) 2717 { 2718 x86_64_mov_reg_imm_size(inst, X86_64_RDX, size, 4); 2719 } 2720 else 2721 { 2722 x86_64_mov_reg_imm_size(inst, X86_64_RDX, size, 8); 2723 } 2724 if(soffset != 0) 2725 { 2726 x86_64_add_reg_imm_size(inst, X86_64_RSI, soffset, 8); 2727 } 2728 if(doffset != 0) 2729 { 2730 x86_64_add_reg_imm_size(inst, X86_64_RDI, doffset, 8); 2731 } 2732 inst = x86_64_call_code(inst, (jit_nint)jit_memcpy); 2733 return inst; 2734 } 2735 2736 /* 2737 * Fill a small block. This generates inlined code. 2738 * 2739 * Set is_aligned to zero if the target location might be not aligned on a 2740 * 16-byte boundary and to non-zero if the block is always aligned. 2741 * 2742 * Set use_sse to zero to disable SSE instructions use (it will make this 2743 * function ignore scratch_xreg). Set it to non-zero otherwise. 2744 * 2745 * We assume that offset + size is in the range -2GB ... +2GB. 2746 */ 2747 static unsigned char * 2748 small_block_set(jit_gencode_t gen, unsigned char *inst, 2749 int dreg, jit_nint doffset, 2750 jit_nuint val, jit_nint size, 2751 int scratch_reg, int scratch_xreg, 2752 int is_aligned, int use_sse) 2753 { 2754 jit_nint offset = 0; 2755 int i; 2756 2757 /* Make sure only the least significant byte serves as the filler. */ 2758 val &= 0xff; 2759 2760 /* Load the filler into a register. */ 2761 if(val == 0) 2762 { 2763 if(!use_sse || (size % 16) != 0) 2764 { 2765 x86_64_clear_reg(inst, scratch_reg); 2766 } 2767 } 2768 else 2769 { 2770 val |= val << 8; 2771 val |= val << 16; 2772 val |= val << 32; 2773 x86_64_mov_reg_imm_size(inst, scratch_reg, val, 8); 2774 } 2775 2776 /* Fill all 16 byte blocks */ 2777 if(use_sse) 2778 { 2779 if(val == 0) 2780 { 2781 x86_64_clear_xreg(inst, scratch_xreg); 2782 } 2783 else 2784 { 2785 x86_64_movq_xreg_reg(inst, scratch_xreg, scratch_reg); 2786 x86_64_movlhps(inst, scratch_xreg, scratch_xreg); 2787 } 2788 2789 while(size >= 16) 2790 { 2791 if(is_aligned) 2792 { 2793 x86_64_movaps_membase_reg(inst, dreg, doffset + offset, 2794 scratch_xreg); 2795 } 2796 else 2797 { 2798 x86_64_movups_membase_reg(inst, dreg, doffset + offset, 2799 scratch_xreg); 2800 } 2801 size -= 16; 2802 offset += 16; 2803 } 2804 } 2805 2806 /* Now fill the rest */ 2807 for(i = 8; i > 0; i /= 2) 2808 { 2809 while(size >= i) 2810 { 2811 x86_64_mov_membase_reg_size(inst, dreg, doffset + offset, 2812 scratch_reg, i); 2813 size -= i; 2814 offset += i; 2815 } 2816 } 2817 return inst; 2818 } 2819 2820 void 2821 _jit_gen_start_block(jit_gencode_t gen, jit_block_t block) 2822 { 2823 jit_int *fixup; 2824 jit_int *next; 2825 void **absolute_fixup; 2826 void **absolute_next; 2827 2828 /* Set the address of this block */ 2829 block->address = (void *)(gen->ptr); 2830 2831 /* If this block has pending fixups, then apply them now */ 2832 fixup = (jit_int *)(block->fixup_list); 2833 if(DEBUG_FIXUPS && fixup) 2834 { 2835 fprintf(stderr, "Block: %lx\n", (jit_nint)block); 2836 } 2837 while(fixup != 0) 2838 { 2839 if(DEBUG_FIXUPS) 2840 { 2841 fprintf(stderr, "Fixup Address: %lx, Value: %x\n", 2842 (jit_nint)fixup, fixup[0]); 2843 } 2844 next = (jit_int *)_JIT_CALC_NEXT_FIXUP(fixup, fixup[0]); 2845 fixup[0] = (jit_int) 2846 (((jit_nint)(block->address)) - ((jit_nint)fixup) - 4); 2847 fixup = next; 2848 } 2849 block->fixup_list = 0; 2850 2851 /* Absolute fixups contain complete pointers */ 2852 absolute_fixup = (void**)(block->fixup_absolute_list); 2853 while(absolute_fixup != 0) 2854 { 2855 absolute_next = (void **)(absolute_fixup[0]); 2856 absolute_fixup[0] = (void *)((jit_nint)(block->address)); 2857 absolute_fixup = absolute_next; 2858 } 2859 block->fixup_absolute_list = 0; 2860 } 2861 2862 void 2863 _jit_gen_end_block(jit_gencode_t gen, jit_block_t block) 2864 { 2865 /* Nothing to do here for x86 */ 2866 } 2867 2868 int 2869 _jit_gen_is_global_candidate(jit_type_t type) 2870 { 2871 switch(jit_type_remove_tags(type)->kind) 2872 { 2873 case JIT_TYPE_SBYTE: 2874 case JIT_TYPE_UBYTE: 2875 case JIT_TYPE_SHORT: 2876 case JIT_TYPE_USHORT: 2877 case JIT_TYPE_INT: 2878 case JIT_TYPE_UINT: 2879 case JIT_TYPE_LONG: 2880 case JIT_TYPE_ULONG: 2881 case JIT_TYPE_NINT: 2882 case JIT_TYPE_NUINT: 2883 case JIT_TYPE_PTR: 2884 case JIT_TYPE_SIGNATURE: 2885 { 2886 return 1; 2887 } 2888 } 2889 return 0; 2890 } 2891 2892 /* 2893 * Do the stuff usually handled in jit-rules.c for native implementations 2894 * here too because the common implementation is not enough for x86_64. 2895 */ 2896 2897 /* 2898 * Determine if a type corresponds to a structure or union. 2899 */ 2900 static int 2901 is_struct_or_union(jit_type_t type) 2902 { 2903 type = jit_type_normalize(type); 2904 if(type) 2905 { 2906 if(type->kind == JIT_TYPE_STRUCT || type->kind == JIT_TYPE_UNION) 2907 { 2908 return 1; 2909 } 2910 } 2911 return 0; 2912 } 2913 2914 static int 2915 _jit_classify_struct_return(jit_param_passing_t *passing, 2916 _jit_param_t *param, jit_type_t return_type) 2917 { 2918 /* Initialize the param passing structure */ 2919 jit_memset(passing, 0, sizeof(jit_param_passing_t)); 2920 jit_memset(param, 0, sizeof(_jit_param_t)); 2921 2922 passing->word_regs = _jit_word_return_regs; 2923 passing->max_word_regs = _jit_num_word_return_regs; 2924 passing->float_regs = _jit_sse_return_regs; 2925 passing->max_float_regs = _jit_num_sse_return_regs; 2926 2927 if(!(_jit_classify_struct(passing, param, return_type))) 2928 { 2929 return 0; 2930 } 2931 2932 return 1; 2933 } 2934 2935 /* 2936 * Load a struct to the register(s) in which it will be returned. 2937 */ 2938 static unsigned char * 2939 return_struct(unsigned char *inst, jit_function_t func, int ptr_reg) 2940 { 2941 jit_type_t return_type; 2942 jit_type_t signature = jit_function_get_signature(func); 2943 2944 return_type = jit_type_get_return(signature); 2945 if(is_struct_or_union(return_type)) 2946 { 2947 jit_nuint size; 2948 jit_param_passing_t passing; 2949 _jit_param_t return_param; 2950 2951 if(!_jit_classify_struct_return(&passing, &return_param, 2952 return_type)) 2953 { 2954 /* It's an error so simply return insn */ 2955 return inst; 2956 } 2957 2958 size = jit_type_get_size(return_type); 2959 if(size <= 8) 2960 { 2961 /* one register is used for returning the value */ 2962 if(IS_GENERAL_REG(return_param.un.reg_info[0].reg)) 2963 { 2964 int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; 2965 2966 if(size <= 4) 2967 { 2968 x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 4); 2969 } 2970 else 2971 { 2972 x86_64_mov_reg_regp_size(inst, reg, ptr_reg, 8); 2973 } 2974 } 2975 else 2976 { 2977 int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; 2978 2979 if(size <= 4) 2980 { 2981 x86_64_movss_reg_regp(inst, reg, ptr_reg); 2982 } 2983 else 2984 { 2985 x86_64_movsd_reg_regp(inst, reg, ptr_reg); 2986 } 2987 } 2988 } 2989 else 2990 { 2991 /* In this case we might need up to two registers */ 2992 if(return_param.arg_class == 1) 2993 { 2994 /* This must be one xmm register */ 2995 int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; 2996 int alignment = jit_type_get_alignment(return_type); 2997 2998 if((alignment & 0xf) == 0) 2999 { 3000 /* The type is aligned on a 16 byte boundary */ 3001 x86_64_movaps_reg_regp(inst, reg, ptr_reg); 3002 } 3003 else 3004 { 3005 x86_64_movups_reg_regp(inst, reg, ptr_reg); 3006 } 3007 } 3008 else 3009 { 3010 int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; 3011 3012 if(IS_GENERAL_REG(return_param.un.reg_info[0].reg)) 3013 { 3014 x86_64_mov_reg_regp_size(inst, reg, 3015 ptr_reg, 8); 3016 } 3017 else 3018 { 3019 x86_64_movsd_reg_regp(inst, reg, ptr_reg); 3020 } 3021 size -= 8; 3022 reg = _jit_reg_info[return_param.un.reg_info[1].reg].cpu_reg; 3023 if(IS_GENERAL_REG(return_param.un.reg_info[1].reg)) 3024 { 3025 if(size <= 4) 3026 { 3027 x86_64_mov_reg_membase_size(inst, reg, ptr_reg, 3028 8, 4); 3029 } 3030 else 3031 { 3032 x86_64_mov_reg_membase_size(inst, reg, ptr_reg, 3033 8, 8); 3034 } 3035 } 3036 else 3037 { 3038 if(size <= 4) 3039 { 3040 x86_64_movss_reg_membase(inst, reg, 3041 ptr_reg, 8); 3042 } 3043 else 3044 { 3045 x86_64_movsd_reg_membase(inst, reg, 3046 ptr_reg, 8); 3047 } 3048 } 3049 } 3050 } 3051 } 3052 return inst; 3053 } 3054 3055 /* 3056 * Flush a struct return value from the registers to the value 3057 * on the stack. 3058 */ 3059 static unsigned char * 3060 flush_return_struct(unsigned char *inst, jit_value_t value) 3061 { 3062 jit_type_t return_type; 3063 3064 return_type = jit_value_get_type(value); 3065 if(is_struct_or_union(return_type)) 3066 { 3067 jit_nuint size; 3068 jit_nint offset; 3069 jit_param_passing_t passing; 3070 _jit_param_t return_param; 3071 3072 if(!_jit_classify_struct_return(&passing, &return_param, return_type)) 3073 { 3074 /* It's an error so simply return insn */ 3075 return inst; 3076 } 3077 3078 return_param.value = value; 3079 3080 _jit_gen_fix_value(value); 3081 size = jit_type_get_size(return_type); 3082 offset = value->frame_offset; 3083 if(size <= 8) 3084 { 3085 /* one register is used for returning the value */ 3086 if(IS_GENERAL_REG(return_param.un.reg_info[0].reg)) 3087 { 3088 int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; 3089 3090 if(size <= 4) 3091 { 3092 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, reg, 4); 3093 } 3094 else 3095 { 3096 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, reg, 8); 3097 } 3098 } 3099 else 3100 { 3101 int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; 3102 3103 if(size <= 4) 3104 { 3105 x86_64_movss_membase_reg(inst, X86_64_RBP, offset, reg); 3106 } 3107 else 3108 { 3109 x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, reg); 3110 } 3111 } 3112 } 3113 else 3114 { 3115 /* In this case we might need up to two registers */ 3116 if(return_param.arg_class == 1) 3117 { 3118 /* This must be one xmm register */ 3119 int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; 3120 int alignment = jit_type_get_alignment(return_type); 3121 3122 if((alignment & 0xf) == 0) 3123 { 3124 /* The type is aligned on a 16 byte boundary */ 3125 x86_64_movaps_membase_reg(inst, X86_64_RBP, offset, reg); 3126 } 3127 else 3128 { 3129 x86_64_movups_membase_reg(inst, X86_64_RBP, offset, reg); 3130 } 3131 } 3132 else 3133 { 3134 int reg = _jit_reg_info[return_param.un.reg_info[0].reg].cpu_reg; 3135 3136 if(IS_GENERAL_REG(return_param.un.reg_info[0].reg)) 3137 { 3138 x86_64_mov_membase_reg_size(inst, X86_64_RBP, offset, 3139 reg, 8); 3140 } 3141 else 3142 { 3143 x86_64_movsd_membase_reg(inst, X86_64_RBP, offset, reg); 3144 } 3145 size -= 8; 3146 reg = _jit_reg_info[return_param.un.reg_info[1].reg].cpu_reg; 3147 if(IS_GENERAL_REG(return_param.un.reg_info[1].reg)) 3148 { 3149 if(size <= 4) 3150 { 3151 x86_64_mov_membase_reg_size(inst, X86_64_RBP, 3152 offset + 8, reg, 4); 3153 } 3154 else 3155 { 3156 x86_64_mov_membase_reg_size(inst, X86_64_RBP, 3157 offset + 8, reg, 8); 3158 } 3159 } 3160 else 3161 { 3162 if(size <= 4) 3163 { 3164 x86_64_movss_membase_reg(inst, X86_64_RBP, 3165 offset + 8, reg); 3166 } 3167 else 3168 { 3169 x86_64_movsd_membase_reg(inst, X86_64_RBP, 3170 offset + 8, reg); 3171 } 3172 } 3173 } 3174 } 3175 } 3176 return inst; 3177 } 3178 3179 void 3180 _jit_gen_insn(jit_gencode_t gen, jit_function_t func, 3181 jit_block_t block, jit_insn_t insn) 3182 { 3183 switch(insn->opcode) 3184 { 3185 #define JIT_INCLUDE_RULES 3186 #include "jit-rules-x86-64.inc" 3187 #undef JIT_INCLUDE_RULES 3188 3189 default: 3190 { 3191 fprintf(stderr, "TODO(%x) at %s, %d\n", 3192 (int)(insn->opcode), __FILE__, (int)__LINE__); 3193 } 3194 break; 3195 } 3196 } 3197 3198 /* 3199 * Fixup the passing area after all parameters have been allocated either 3200 * in registers or on the stack. 3201 * This is typically used for adding pad words for keeping the stack aligned. 3202 */ 3203 void 3204 _jit_fix_call_stack(jit_param_passing_t *passing) 3205 { 3206 if((passing->stack_size & 0x0f) != 0) 3207 { 3208 passing->stack_size = (passing->stack_size + 0x0f) & ~((jit_nint)0x0f); 3209 passing->stack_pad = 1; 3210 } 3211 } 3212 3213 #ifndef JIT_USE_PARAM_AREA 3214 /* 3215 * Setup the call stack before pushing any parameters. 3216 * This is used usually for pushing pad words for alignment. 3217 * The function is needed only if the backend doesn't work with the 3218 * parameter area. 3219 */ 3220 int 3221 _jit_setup_call_stack(jit_function_t func, jit_param_passing_t *passing) 3222 { 3223 if(passing->stack_pad) 3224 { 3225 int current; 3226 jit_value_t pad_value; 3227 3228 pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0); 3229 if(!pad_value) 3230 { 3231 return 0; 3232 } 3233 for(current = 0; current < passing->stack_pad; ++current) 3234 { 3235 if(!jit_insn_push(func, pad_value)) 3236 { 3237 return 0; 3238 } 3239 } 3240 } 3241 return 1; 3242 } 3243 #endif /* !JIT_USE_PARAM_AREA */ 3244 3245 /* 3246 * Push a parameter onto the stack. 3247 */ 3248 static int 3249 push_param(jit_function_t func, _jit_param_t *param, jit_type_t type) 3250 { 3251 if(is_struct_or_union(type) && !is_struct_or_union(param->value->type)) 3252 { 3253 jit_value_t value; 3254 3255 if(!(value = jit_insn_address_of(func, param->value))) 3256 { 3257 return 0; 3258 } 3259 #ifdef JIT_USE_PARAM_AREA 3260 /* Copy the value into the outgoing parameter area, by pointer */ 3261 if(!jit_insn_set_param_ptr(func, value, type, param->un.offset)) 3262 { 3263 return 0; 3264 } 3265 #else 3266 /* Push the parameter value onto the stack, by pointer */ 3267 if(!jit_insn_push_ptr(func, value, type)) 3268 { 3269 return 0; 3270 } 3271 if(param->stack_pad) 3272 { 3273 int current; 3274 jit_value_t pad_value; 3275 3276 pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0); 3277 if(!pad_value) 3278 { 3279 return 0; 3280 } 3281 for(current = 0; current < param->stack_pad; ++current) 3282 { 3283 if(!jit_insn_push(func, pad_value)) 3284 { 3285 return 0; 3286 } 3287 } 3288 } 3289 #endif 3290 } 3291 else 3292 { 3293 #ifdef JIT_USE_PARAM_AREA 3294 /* Copy the value into the outgoing parameter area */ 3295 if(!jit_insn_set_param(func, param->value, param->un.offset)) 3296 { 3297 return 0; 3298 } 3299 #else 3300 /* Push the parameter value onto the stack */ 3301 if(!jit_insn_push(func, param->value)) 3302 { 3303 return 0; 3304 } 3305 if(param->stack_pad) 3306 { 3307 int current; 3308 jit_value_t pad_value; 3309 3310 pad_value = jit_value_create_nint_constant(func, jit_type_nint, 0); 3311 if(!pad_value) 3312 { 3313 return 0; 3314 } 3315 for(current = 0; current < param->stack_pad; ++current) 3316 { 3317 if(!jit_insn_push(func, pad_value)) 3318 { 3319 return 0; 3320 } 3321 } 3322 } 3323 #endif 3324 } 3325 return 1; 3326 } 3327 3328 int 3329 _jit_setup_reg_param(jit_function_t func, _jit_param_t *param, 3330 jit_type_t param_type) 3331 { 3332 if(param->arg_class == 1) 3333 { 3334 param->un.reg_info[0].value = param->value; 3335 } 3336 else if(param->arg_class == 2) 3337 { 3338 jit_nint size = jit_type_get_size(param_type); 3339 jit_value_t value_ptr; 3340 3341 if(!(value_ptr = jit_insn_address_of(func, param->value))) 3342 { 3343 return 0; 3344 } 3345 if(IS_GENERAL_REG(param->un.reg_info[0].reg)) 3346 { 3347 param->un.reg_info[0].value = 3348 jit_insn_load_relative(func, value_ptr, 0, jit_type_long); 3349 if(!(param->un.reg_info[0].value)) 3350 { 3351 return 0; 3352 } 3353 } 3354 else 3355 { 3356 param->un.reg_info[0].value = 3357 jit_insn_load_relative(func, value_ptr, 0, jit_type_float64); 3358 if(!(param->un.reg_info[0].value)) 3359 { 3360 return 0; 3361 } 3362 } 3363 size -= 8; 3364 if(IS_GENERAL_REG(param->un.reg_info[1].reg)) 3365 { 3366 if(size <= 4) 3367 { 3368 param->un.reg_info[1].value = 3369 jit_insn_load_relative(func, value_ptr, 8, jit_type_int); 3370 if(!(param->un.reg_info[1].value)) 3371 { 3372 return 0; 3373 } 3374 } 3375 else 3376 { 3377 param->un.reg_info[1].value = 3378 jit_insn_load_relative(func, value_ptr, 8, jit_type_long); 3379 if(!(param->un.reg_info[1].value)) 3380 { 3381 return 0; 3382 } 3383 } 3384 } 3385 else 3386 { 3387 if(size <= 4) 3388 { 3389 param->un.reg_info[1].value = 3390 jit_insn_load_relative(func, value_ptr, 8, jit_type_float32); 3391 if(!(param->un.reg_info[1].value)) 3392 { 3393 return 0; 3394 } 3395 } 3396 else 3397 { 3398 param->un.reg_info[1].value = 3399 jit_insn_load_relative(func, value_ptr, 8, jit_type_float64); 3400 if(!(param->un.reg_info[1].value)) 3401 { 3402 return 0; 3403 } 3404 } 3405 } 3406 } 3407 return 1; 3408 } 3409 3410 int 3411 _jit_flush_incoming_struct(jit_function_t func, _jit_param_t *param, 3412 jit_type_t param_type) 3413 { 3414 if(param->arg_class == 2) 3415 { 3416 jit_value_t address; 3417 3418 /* Now store the two values in place */ 3419 if(!(address = jit_insn_address_of(func, param->value))) 3420 { 3421 return 0; 3422 } 3423 if(!jit_insn_store_relative(func, address, 0, param->un.reg_info[0].value)) 3424 { 3425 return 0; 3426 } 3427 if(!jit_insn_store_relative(func, address, 8, param->un.reg_info[1].value)) 3428 { 3429 return 0; 3430 } 3431 } 3432 return 1; 3433 } 3434 3435 int 3436 _jit_setup_incoming_param(jit_function_t func, _jit_param_t *param, 3437 jit_type_t param_type) 3438 { 3439 if(param->arg_class == JIT_ARG_CLASS_STACK) 3440 { 3441 /* The parameter is passed on the stack */ 3442 if(!jit_insn_incoming_frame_posn 3443 (func, param->value, param->un.offset)) 3444 { 3445 return 0; 3446 } 3447 } 3448 else 3449 { 3450 param_type = jit_type_remove_tags(param_type); 3451 3452 switch(param_type->kind) 3453 { 3454 case JIT_TYPE_STRUCT: 3455 case JIT_TYPE_UNION: 3456 { 3457 if(param->arg_class == 1) 3458 { 3459 if(!jit_insn_incoming_reg(func, param->value, param->un.reg_info[0].reg)) 3460 { 3461 return 0; 3462 } 3463 } 3464 else 3465 { 3466 /* These cases have to be handled specially */ 3467 /* The struct is passed in two registers */ 3468 jit_nuint size = jit_type_get_size(param_type); 3469 3470 /* The first part is allways a full eightbyte */ 3471 if(IS_GENERAL_REG(param->un.reg_info[0].reg)) 3472 { 3473 if(!(param->un.reg_info[0].value = jit_value_create(func, jit_type_long))) 3474 { 3475 return 0; 3476 } 3477 } 3478 else 3479 { 3480 if(!(param->un.reg_info[0].value = jit_value_create(func, jit_type_float64))) 3481 { 3482 return 0; 3483 } 3484 } 3485 size -= 8; 3486 /* The second part might be of any size <= 8 */ 3487 if(IS_GENERAL_REG(param->un.reg_info[1].reg)) 3488 { 3489 if(size <= 4) 3490 { 3491 if(!(param->un.reg_info[1].value = 3492 jit_value_create(func, jit_type_int))) 3493 { 3494 return 0; 3495 } 3496 } 3497 else 3498 { 3499 if(!(param->un.reg_info[1].value = 3500 jit_value_create(func, jit_type_long))) 3501 { 3502 return 0; 3503 } 3504 } 3505 } 3506 else 3507 { 3508 if(size <= 4) 3509 { 3510 if(!(param->un.reg_info[1].value = 3511 jit_value_create(func, jit_type_float32))) 3512 { 3513 return 0; 3514 } 3515 } 3516 else 3517 { 3518 if(!(param->un.reg_info[1].value = 3519 jit_value_create(func, jit_type_float64))) 3520 { 3521 return 0; 3522 } 3523 } 3524 } 3525 if(!jit_insn_incoming_reg(func, 3526 param->un.reg_info[0].value, 3527 param->un.reg_info[0].reg)) 3528 { 3529 return 0; 3530 } 3531 if(!jit_insn_incoming_reg(func, 3532 param->un.reg_info[1].value, 3533 param->un.reg_info[1].reg)) 3534 { 3535 return 0; 3536 } 3537 } 3538 } 3539 break; 3540 3541 default: 3542 { 3543 if(!jit_insn_incoming_reg(func, param->value, param->un.reg_info[0].reg)) 3544 { 3545 return 0; 3546 } 3547 } 3548 break; 3549 } 3550 } 3551 return 1; 3552 } 3553 3554 int 3555 _jit_setup_outgoing_param(jit_function_t func, _jit_param_t *param, 3556 jit_type_t param_type) 3557 { 3558 if(param->arg_class == JIT_ARG_CLASS_STACK) 3559 { 3560 /* The parameter is passed on the stack */ 3561 if(!push_param(func, param, param_type)) 3562 { 3563 return 0; 3564 } 3565 } 3566 else 3567 { 3568 if(!jit_insn_outgoing_reg(func, param->un.reg_info[0].value, 3569 param->un.reg_info[0].reg)) 3570 { 3571 return 0; 3572 } 3573 if(param->arg_class == 2) 3574 { 3575 if(!jit_insn_outgoing_reg(func, param->un.reg_info[1].value, 3576 param->un.reg_info[1].reg)) 3577 { 3578 return 0; 3579 } 3580 } 3581 } 3582 return 1; 3583 } 3584 3585 int 3586 _jit_setup_return_value(jit_function_t func, jit_value_t return_value, 3587 jit_type_t return_type) 3588 3589 { 3590 /* Structure values must be flushed into the frame, and 3591 everything else ends up in a register */ 3592 if(is_struct_or_union(return_type)) 3593 { 3594 jit_param_passing_t passing; 3595 _jit_param_t return_param; 3596 3597 if(!_jit_classify_struct_return(&passing, &return_param, return_type)) 3598 { 3599 /* It's an error so simply return insn */ 3600 return 0; 3601 } 3602 3603 if(return_param.arg_class == 1) 3604 { 3605 if(!jit_insn_return_reg(func, return_value, 3606 return_param.un.reg_info[0].reg)) 3607 { 3608 return 0; 3609 } 3610 } 3611 else 3612 { 3613 if(!jit_insn_flush_struct(func, return_value)) 3614 { 3615 return 0; 3616 } 3617 } 3618 } 3619 else if(return_type == jit_type_float32 || 3620 return_type == jit_type_float64) 3621 { 3622 if(!jit_insn_return_reg(func, return_value, X86_64_REG_XMM0)) 3623 { 3624 return 0; 3625 } 3626 } 3627 else if(return_type == jit_type_nfloat) 3628 { 3629 if(!jit_insn_return_reg(func, return_value, X86_64_REG_ST0)) 3630 { 3631 return 0; 3632 } 3633 } 3634 else if(return_type->kind != JIT_TYPE_VOID) 3635 { 3636 if(!jit_insn_return_reg(func, return_value, X86_64_REG_RAX)) 3637 { 3638 return 0; 3639 } 3640 } 3641 return 1; 3642 } 3643 3644 void 3645 _jit_init_args(int abi, jit_param_passing_t *passing) 3646 { 3647 passing->max_word_regs = _jit_num_word_regs; 3648 passing->word_regs = _jit_word_arg_regs; 3649 passing->max_float_regs = _jit_num_float_regs; 3650 passing->float_regs = _jit_float_arg_regs; 3651 } 3652 3653 int 3654 _jit_create_entry_insns(jit_function_t func) 3655 { 3656 jit_value_t value; 3657 int has_struct_return = 0; 3658 jit_type_t signature = func->signature; 3659 int abi = jit_type_get_abi(signature); 3660 unsigned int num_args = jit_type_num_params(signature); 3661 jit_param_passing_t passing; 3662 _jit_param_t param[num_args]; 3663 _jit_param_t nested_param; 3664 _jit_param_t struct_return_param; 3665 int current_param; 3666 3667 /* Reset the local variable frame size for this function */ 3668 func->builder->frame_size = JIT_INITIAL_FRAME_SIZE; 3669 3670 /* Initialize the param passing structure */ 3671 jit_memset(&passing, 0, sizeof(jit_param_passing_t)); 3672 jit_memset(param, 0, sizeof(_jit_param_t) * num_args); 3673 3674 passing.params = param; 3675 passing.stack_size = JIT_INITIAL_STACK_OFFSET; 3676 3677 /* Let the specific backend initialize it's part of the params */ 3678 _jit_init_args(abi, &passing); 3679 3680 /* Allocate the structure return pointer */ 3681 if((value = jit_value_get_struct_pointer(func))) 3682 { 3683 jit_memset(&struct_return_param, 0, sizeof(_jit_param_t)); 3684 if(!(_jit_classify_param(&passing, &struct_return_param, 3685 jit_type_void_ptr))) 3686 { 3687 return 0; 3688 } 3689 struct_return_param.value = value; 3690 has_struct_return = 1; 3691 } 3692 3693 /* If the function is nested, then we need an extra parameter 3694 to pass the pointer to the parent's local variable frame */ 3695 if(func->nested_parent) 3696 { 3697 jit_memset(&nested_param, 0, sizeof(_jit_param_t)); 3698 if(!(_jit_classify_param(&passing, &nested_param, 3699 jit_type_void_ptr))) 3700 { 3701 return 0; 3702 } 3703 3704 nested_param.value = jit_value_create(func, jit_type_void_ptr); 3705 jit_function_set_parent_frame(func, nested_param.value); 3706 } 3707 3708 /* Let the backend classify the parameters */ 3709 for(current_param = 0; current_param < num_args; current_param++) 3710 { 3711 jit_type_t param_type; 3712 3713 param_type = jit_type_get_param(signature, current_param); 3714 param_type = jit_type_normalize(param_type); 3715 3716 if(!(_jit_classify_param(&passing, &(passing.params[current_param]), 3717 param_type))) 3718 { 3719 return 0; 3720 } 3721 } 3722 3723 /* Now we can setup the incoming parameters */ 3724 for(current_param = 0; current_param < num_args; current_param++) 3725 { 3726 jit_type_t param_type; 3727 3728 param_type = jit_type_get_param(signature, current_param); 3729 if(!(param[current_param].value)) 3730 { 3731 if(!(param[current_param].value = jit_value_get_param(func, current_param))) 3732 { 3733 return 0; 3734 } 3735 } 3736 if(!_jit_setup_incoming_param(func, &(param[current_param]), param_type)) 3737 { 3738 return 0; 3739 } 3740 } 3741 3742 if(func->nested_parent) 3743 { 3744 if(!_jit_setup_incoming_param(func, &nested_param, jit_type_void_ptr)) 3745 { 3746 return 0; 3747 } 3748 } 3749 3750 if(has_struct_return) 3751 { 3752 if(!_jit_setup_incoming_param(func, &struct_return_param, jit_type_void_ptr)) 3753 { 3754 return 0; 3755 } 3756 } 3757 3758 /* Now we flush the incoming structs passed in registers */ 3759 for(current_param = 0; current_param < num_args; current_param++) 3760 { 3761 if(param[current_param].arg_class != JIT_ARG_CLASS_STACK) 3762 { 3763 jit_type_t param_type; 3764 3765 param_type = jit_type_get_param(signature, current_param); 3766 if(!_jit_flush_incoming_struct(func, &(param[current_param]), 3767 param_type)) 3768 { 3769 return 0; 3770 } 3771 } 3772 } 3773 3774 return 1; 3775 } 3776 3777 int _jit_create_call_setup_insns 3778 (jit_function_t func, jit_type_t signature, 3779 jit_value_t *args, unsigned int num_args, 3780 int is_nested, jit_value_t parent_frame, 3781 jit_value_t *struct_return, int flags) 3782 { 3783 int abi = jit_type_get_abi(signature); 3784 jit_type_t return_type; 3785 jit_value_t value; 3786 jit_value_t return_ptr; 3787 int current_param; 3788 jit_param_passing_t passing; 3789 _jit_param_t param[num_args]; 3790 _jit_param_t nested_param; 3791 _jit_param_t struct_return_param; 3792 3793 /* Initialize the param passing structure */ 3794 jit_memset(&passing, 0, sizeof(jit_param_passing_t)); 3795 jit_memset(param, 0, sizeof(_jit_param_t) * num_args); 3796 3797 passing.params = param; 3798 passing.stack_size = 0; 3799 3800 /* Let the specific backend initialize it's part of the params */ 3801 _jit_init_args(abi, &passing); 3802 3803 /* Determine if we need an extra hidden parameter for returning a 3804 structure */ 3805 return_type = jit_type_get_return(signature); 3806 if(jit_type_return_via_pointer(return_type)) 3807 { 3808 value = jit_value_create(func, return_type); 3809 if(!value) 3810 { 3811 return 0; 3812 } 3813 *struct_return = value; 3814 return_ptr = jit_insn_address_of(func, value); 3815 if(!return_ptr) 3816 { 3817 return 0; 3818 } 3819 jit_memset(&struct_return_param, 0, sizeof(_jit_param_t)); 3820 struct_return_param.value = return_ptr; 3821 if(!(_jit_classify_param(&passing, &struct_return_param, 3822 jit_type_void_ptr))) 3823 { 3824 return 0; 3825 } 3826 } 3827 else 3828 { 3829 *struct_return = 0; 3830 return_ptr = 0; 3831 } 3832 3833 /* Determine how many parameters are going to end up in word registers, 3834 and compute the largest stack size needed to pass stack parameters */ 3835 if(is_nested) 3836 { 3837 jit_memset(&nested_param, 0, sizeof(_jit_param_t)); 3838 if(!(_jit_classify_param(&passing, &nested_param, 3839 jit_type_void_ptr))) 3840 { 3841 return 0; 3842 } 3843 3844 nested_param.value = parent_frame; 3845 } 3846 3847 /* Let the backend classify the parameters */ 3848 for(current_param = 0; current_param < num_args; current_param++) 3849 { 3850 jit_type_t param_type; 3851 3852 param_type = jit_type_get_param(signature, current_param); 3853 param_type = jit_type_normalize(param_type); 3854 3855 if(!(_jit_classify_param(&passing, &(passing.params[current_param]), 3856 param_type))) 3857 { 3858 return 0; 3859 } 3860 /* Set the argument value */ 3861 passing.params[current_param].value = args[current_param]; 3862 } 3863 3864 /* Let the backend do final adjustments to the passing area */ 3865 _jit_fix_call_stack(&passing); 3866 3867 #ifdef JIT_USE_PARAM_AREA 3868 if(passing.stack_size > func->builder->param_area_size) 3869 { 3870 func->builder->param_area_size = passing.stack_size; 3871 } 3872 #else 3873 /* Flush deferred stack pops from previous calls if too many 3874 parameters have collected up on the stack since last time */ 3875 if(!jit_insn_flush_defer_pop(func, 32 - passing.stack_size)) 3876 { 3877 return 0; 3878 } 3879 3880 if(!_jit_setup_call_stack(func, &passing)) 3881 { 3882 return 0; 3883 } 3884 #endif 3885 3886 /* Now setup the arguments on the stack or in the registers in reverse order */ 3887 /* First process the params passed on the stack */ 3888 current_param = num_args; 3889 while(current_param > 0) 3890 { 3891 --current_param; 3892 if(param[current_param].arg_class == JIT_ARG_CLASS_STACK) 3893 { 3894 jit_type_t param_type; 3895 3896 param_type = jit_type_get_param(signature, current_param); 3897 if(!_jit_setup_outgoing_param(func, &(param[current_param]), param_type)) 3898 { 3899 return 0; 3900 } 3901 } 3902 } 3903 3904 /* Handle the structure return pointer if it's passed on the stack */ 3905 if(return_ptr) 3906 { 3907 if(struct_return_param.arg_class == JIT_ARG_CLASS_STACK) 3908 { 3909 if(!_jit_setup_outgoing_param(func, &struct_return_param, 3910 jit_type_void_ptr)) 3911 { 3912 return 0; 3913 } 3914 } 3915 } 3916 3917 /* Handle the parent's frame pointer if it's passed on the stack */ 3918 if(is_nested) 3919 { 3920 if(nested_param.arg_class == JIT_ARG_CLASS_STACK) 3921 { 3922 if(!_jit_setup_outgoing_param(func, &nested_param, 3923 jit_type_void_ptr)) 3924 { 3925 return 0; 3926 } 3927 } 3928 } 3929 3930 /* Now setup the values passed in registers */ 3931 current_param = num_args; 3932 while(current_param > 0) 3933 { 3934 --current_param; 3935 3936 if(param[current_param].arg_class != JIT_ARG_CLASS_STACK) 3937 { 3938 jit_type_t param_type; 3939 3940 param_type = jit_type_get_param(signature, current_param); 3941 if(!_jit_setup_reg_param(func, &(param[current_param]), param_type)) 3942 { 3943 return 0; 3944 } 3945 } 3946 } 3947 3948 /* Handle the parent's frame pointer if required */ 3949 if(is_nested) 3950 { 3951 if(nested_param.arg_class != JIT_ARG_CLASS_STACK) 3952 { 3953 if(!_jit_setup_reg_param(func, &nested_param, 3954 jit_type_void_ptr)) 3955 { 3956 return 0; 3957 } 3958 } 3959 } 3960 3961 /* Handle the structure return pointer if required */ 3962 if(return_ptr) 3963 { 3964 if(struct_return_param.arg_class != JIT_ARG_CLASS_STACK) 3965 { 3966 if(!_jit_setup_reg_param(func, &struct_return_param, 3967 jit_type_void_ptr)) 3968 { 3969 return 0; 3970 } 3971 } 3972 } 3973 3974 /* And finally assign the registers */ 3975 current_param = num_args; 3976 while(current_param > 0) 3977 { 3978 --current_param; 3979 if(param[current_param].arg_class != JIT_ARG_CLASS_STACK) 3980 { 3981 jit_type_t param_type; 3982 3983 param_type = jit_type_get_param(signature, current_param); 3984 if(!_jit_setup_outgoing_param(func, &(param[current_param]), 3985 param_type)) 3986 { 3987 return 0; 3988 } 3989 } 3990 } 3991 3992 /* Handle the parent's frame pointer if required */ 3993 if(is_nested) 3994 { 3995 if(nested_param.arg_class != JIT_ARG_CLASS_STACK) 3996 { 3997 if(!_jit_setup_outgoing_param(func, &nested_param, 3998 jit_type_void_ptr)) 3999 { 4000 return 0; 4001 } 4002 } 4003 } 4004 4005 /* Add the structure return pointer if required */ 4006 if(return_ptr) 4007 { 4008 if(struct_return_param.arg_class != JIT_ARG_CLASS_STACK) 4009 { 4010 if(!_jit_setup_outgoing_param(func, &struct_return_param, 4011 jit_type_void_ptr)) 4012 { 4013 return 0; 4014 } 4015 } 4016 } 4017 4018 return 1; 4019 } 4020 4021 int 4022 _jit_create_call_return_insns(jit_function_t func, jit_type_t signature, 4023 jit_value_t *args, unsigned int num_args, 4024 jit_value_t return_value, int is_nested) 4025 { 4026 jit_type_t return_type; 4027 int ptr_return; 4028 #ifndef JIT_USE_PARAM_AREA 4029 int abi = jit_type_get_abi(signature); 4030 int current_param; 4031 jit_param_passing_t passing; 4032 _jit_param_t param[num_args]; 4033 _jit_param_t nested_param; 4034 _jit_param_t struct_return_param; 4035 #endif /* !JIT_USE_PARAM_AREA */ 4036 4037 return_type = jit_type_normalize(jit_type_get_return(signature)); 4038 ptr_return = jit_type_return_via_pointer(return_type); 4039 #ifndef JIT_USE_PARAM_AREA 4040 /* Initialize the param passing structure */ 4041 jit_memset(&passing, 0, sizeof(jit_param_passing_t)); 4042 jit_memset(param, 0, sizeof(_jit_param_t) * num_args); 4043 4044 passing.params = param; 4045 passing.stack_size = 0; 4046 4047 /* Let the specific backend initialize it's part of the params */ 4048 _jit_init_args(abi, &passing); 4049 4050 /* Determine how many parameters are going to end up in word registers, 4051 and compute the largest stack size needed to pass stack parameters */ 4052 if(is_nested) 4053 { 4054 jit_memset(&nested_param, 0, sizeof(_jit_param_t)); 4055 if(!(_jit_classify_param(&passing, &nested_param, 4056 jit_type_void_ptr))) 4057 { 4058 return 0; 4059 } 4060 } 4061 4062 /* Determine if we need an extra hidden parameter for returning a 4063 structure */ 4064 if(ptr_return) 4065 { 4066 jit_memset(&struct_return_param, 0, sizeof(_jit_param_t)); 4067 if(!(_jit_classify_param(&passing, &struct_return_param, 4068 jit_type_void_ptr))) 4069 { 4070 return 0; 4071 } 4072 } 4073 4074 /* Let the backend classify the parameters */ 4075 for(current_param = 0; current_param < num_args; current_param++) 4076 { 4077 jit_type_t param_type; 4078 4079 param_type = jit_type_get_param(signature, current_param); 4080 param_type = jit_type_normalize(param_type); 4081 4082 if(!(_jit_classify_param(&passing, &(passing.params[current_param]), 4083 param_type))) 4084 { 4085 return 0; 4086 } 4087 } 4088 4089 /* Let the backend do final adjustments to the passing area */ 4090 _jit_fix_call_stack(&passing); 4091 4092 /* Pop the bytes from the system stack */ 4093 if(passing.stack_size > 0) 4094 { 4095 if(!jit_insn_defer_pop_stack(func, passing.stack_size)) 4096 { 4097 return 0; 4098 } 4099 } 4100 #endif /* !JIT_USE_PARAM_AREA */ 4101 4102 /* Bail out now if we don't need to worry about return values */ 4103 if(!return_value || ptr_return) 4104 { 4105 return 1; 4106 } 4107 4108 if(!_jit_setup_return_value(func, return_value, return_type)) 4109 { 4110 return 0; 4111 } 4112 4113 /* Everything is back where it needs to be */ 4114 return 1; 4115 } 4116 4117 #endif /* JIT_BACKEND_X86_64 */