github.com/goccy/go-jit@v0.0.0-20200514131505-ff78d45cf6af/internal/ccall/jit-gen-arm.h (about)

     1  /*
     2   * jit-gen-arm.h - Code generation macros for the ARM processor.
     3   *
     4   * Copyright (C) 2003, 2004  Southern Storm Software, Pty Ltd.
     5   * Copyright (C) 2008, 2009  Michele Tartara  <mikyt@users.sourceforge.net>
     6   *
     7   * This file is part of the libjit library.
     8   *
     9   * The libjit library is free software: you can redistribute it and/or
    10   * modify it under the terms of the GNU Lesser General Public License
    11   * as published by the Free Software Foundation, either version 2.1 of
    12   * the License, or (at your option) any later version.
    13   *
    14   * The libjit library is distributed in the hope that it will be useful,
    15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
    16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    17   * Lesser General Public License for more details.
    18   *
    19   * You should have received a copy of the GNU Lesser General Public
    20   * License along with the libjit library.  If not, see
    21   * <http://www.gnu.org/licenses/>.
    22   */
    23  
    24  #ifndef	_JIT_GEN_ARM_H
    25  #define	_JIT_GEN_ARM_H
    26  
    27  #include <assert.h>
    28  #include <jit-rules-arm.h>
    29  
    30  #ifdef __cplusplus
    31  extern "C" {
    32  #endif
    33  
    34  /*
    35   * Register numbers.
    36   */
    37  typedef enum
    38  {
    39  	ARM_R0   = 0,
    40  	ARM_R1   = 1,
    41  	ARM_R2   = 2,
    42  	ARM_R3   = 3,
    43  	ARM_R4   = 4,
    44  	ARM_R5   = 5,
    45  	ARM_R6   = 6,
    46  	ARM_R7   = 7,
    47  	ARM_R8   = 8,
    48  	ARM_R9   = 9,
    49  	ARM_R10  = 10,
    50  	ARM_R11  = 11,
    51  	ARM_R12  = 12,
    52  	ARM_R13  = 13,
    53  	ARM_R14  = 14,
    54  	ARM_R15  = 15,
    55  	ARM_FP   = ARM_R11,			/* Frame pointer */
    56  	ARM_LINK = ARM_R14,			/* Link register */
    57  	ARM_PC   = ARM_R15,			/* Program counter */
    58  	ARM_WORK = ARM_R12,			/* Work register that we can destroy */
    59  	ARM_SP   = ARM_R13			/* Stack pointer */
    60  
    61  } ARM_REG;
    62  
    63  #ifdef JIT_ARM_HAS_FPA
    64  /*
    65   * Floating-point register numbers for the FPA architecture.
    66   */
    67  typedef enum
    68  {
    69  	ARM_F0	= 0,
    70  	ARM_F1	= 1,
    71  	ARM_F2	= 2,
    72  	ARM_F3	= 3,
    73  	ARM_F4	= 4,
    74  	ARM_F5	= 5,
    75  	ARM_F6	= 6,
    76  	ARM_F7	= 7
    77  
    78  } ARM_FREG;
    79  #endif
    80  
    81  #ifdef JIT_ARM_HAS_VFP
    82  /*
    83   * Floating-point register numbers for the Vector Floating Point architecture.
    84   */
    85  typedef enum
    86  {
    87  	ARM_S0	= 0,
    88  	ARM_S1	= 1,
    89  	ARM_S2	= 2,
    90  	ARM_S3	= 3,
    91  	ARM_S4	= 4,
    92  	ARM_S5	= 5,
    93  	ARM_S6	= 6,
    94  	ARM_S7	= 7,
    95  	ARM_S8	= 8,
    96  	ARM_S9	= 9,
    97  	ARM_S10	= 10,
    98  	ARM_S11	= 11,
    99  	ARM_S12	= 12,
   100  	ARM_S13	= 13,
   101  	ARM_S14	= 14,
   102  	ARM_S15	= 15,
   103  	ARM_D8 = 8,
   104  	ARM_D9 = 9,
   105  	ARM_D10 = 10,
   106  	ARM_D11 = 11,
   107  	ARM_D12 = 12,
   108  	ARM_D13 = 13,
   109  	ARM_D14 = 14,
   110  	ARM_D15 = 15
   111  } ARM_FREG;
   112  #endif
   113  
   114  /*
   115   * Condition codes.
   116   */
   117  typedef enum
   118  {
   119  	ARM_CC_EQ    = 0,			/* Equal */
   120  	ARM_CC_NE    = 1,			/* Not equal */
   121  	ARM_CC_CS    = 2,			/* Carry set */
   122  	ARM_CC_CC    = 3,			/* Carry clear */
   123  	ARM_CC_MI    = 4,			/* Negative */
   124  	ARM_CC_PL    = 5,			/* Positive */
   125  	ARM_CC_VS    = 6,			/* Overflow set */
   126  	ARM_CC_VC    = 7,			/* Overflow clear */
   127  	ARM_CC_HI    = 8,			/* Higher */
   128  	ARM_CC_LS    = 9,			/* Lower or same */
   129  	ARM_CC_GE    = 10,			/* Signed greater than or equal */
   130  	ARM_CC_LT    = 11,			/* Signed less than */
   131  	ARM_CC_GT    = 12,			/* Signed greater than */
   132  	ARM_CC_LE    = 13,			/* Signed less than or equal */
   133  	ARM_CC_AL    = 14,			/* Always */
   134  	ARM_CC_NV    = 15,			/* Never */
   135  	ARM_CC_GE_UN = ARM_CC_CS,	/* Unsigned greater than or equal */
   136  	ARM_CC_LT_UN = ARM_CC_CC,	/* Unsigned less than */
   137  	ARM_CC_GT_UN = ARM_CC_HI,	/* Unsigned greater than */
   138  	ARM_CC_LE_UN = ARM_CC_LS	/* Unsigned less than or equal */
   139  
   140  } ARM_CC;
   141  
   142  /*
   143   * Arithmetic and logical operations.
   144   */
   145  typedef enum
   146  {
   147  	ARM_AND = 0,				/* Bitwise AND */
   148  	ARM_EOR = 1,				/* Bitwise XOR */
   149  	ARM_SUB = 2,				/* Subtract */
   150  	ARM_RSB = 3,				/* Reverse subtract */
   151  	ARM_ADD = 4,				/* Add */
   152  	ARM_ADC = 5,				/* Add with carry */
   153  	ARM_SBC = 6,				/* Subtract with carry */
   154  	ARM_RSC = 7,				/* Reverse subtract with carry */
   155  	ARM_TST = 8,				/* Test with AND */
   156  	ARM_TEQ = 9,				/* Test with XOR */
   157  	ARM_CMP = 10,				/* Test with SUB (compare) */
   158  	ARM_CMN = 11,				/* Test with ADD */
   159  	ARM_ORR = 12,				/* Bitwise OR */
   160  	ARM_MOV = 13,				/* Move */
   161  	ARM_BIC = 14,				/* Test with Op1 & ~Op2 */
   162  	ARM_MVN = 15				/* Bitwise NOT: Negate the content of a word*/
   163  
   164  } ARM_OP;
   165  
   166  /*
   167   * Shift operators.
   168   */
   169  typedef enum
   170  {
   171  	ARM_SHL = 0,				/* Logical left */
   172  	ARM_SHR = 1,				/* Logical right */
   173  	ARM_SAR = 2,				/* Arithmetic right */
   174  	ARM_ROR = 3					/* Rotate right */
   175  
   176  } ARM_SHIFT;
   177  
   178  #ifdef JIT_ARM_HAS_FPA
   179  /* Floating point definitions for the FPA architecture */
   180  
   181  /*
   182   * Floating-point unary operators.
   183   */
   184  typedef enum
   185  {
   186  	ARM_MVF		= 0,			/* Move */
   187  	ARM_MNF		= 1,			/* Move negative */
   188  	ARM_ABS		= 2,			/* Absolute value */
   189  	ARM_RND		= 3,			/* Round */
   190  	ARM_SQT		= 4,			/* Square root */
   191  	ARM_LOG		= 5,			/* log10 */
   192  	ARM_LGN		= 6,			/* ln */
   193  	ARM_EXP		= 7,			/* exp */
   194  	ARM_SIN		= 8,			/* sin */
   195  	ARM_COS		= 9,			/* cos */
   196  	ARM_TAN		= 10,			/* tan */
   197  	ARM_ASN		= 11,			/* asin */
   198  	ARM_ACS		= 12,			/* acos */
   199  	ARM_ATN		= 13			/* atan */
   200  
   201  } ARM_FUNARY;
   202  
   203  /*
   204   * Floating-point binary operators.
   205   */
   206  typedef enum
   207  {
   208  	ARM_ADF		= 0,			/* Add */
   209  	ARM_MUF		= 1,			/* Multiply */
   210  	ARM_SUF		= 2,			/* Subtract */
   211  	ARM_RSF		= 3,			/* Reverse subtract */
   212  	ARM_DVF		= 4,			/* Divide */
   213  	ARM_RDF		= 5,			/* Reverse divide */
   214  	ARM_POW		= 6,			/* pow */
   215  	ARM_RPW		= 7,			/* Reverse pow */
   216  	ARM_RMF		= 8,			/* Remainder */
   217  	ARM_FML		= 9,			/* Fast multiply (32-bit only) */
   218  	ARM_FDV		= 10,			/* Fast divide (32-bit only) */
   219  	ARM_FRD		= 11,			/* Fast reverse divide (32-bit only) */
   220  	ARM_POL		= 12			/* Polar angle */
   221  
   222  } ARM_FBINARY;
   223  
   224  #endif /* JIT_ARM_HAS_FPA */
   225  
   226  #ifdef JIT_ARM_HAS_VFP
   227  /* Floating point definitions for the Vector Floating Point architecture */
   228  
   229  /*
   230   * Floating-point unary operators.
   231   */
   232  typedef enum
   233  {
   234  	ARM_MVF		= 0,			/* Move - FCPY */
   235  	ARM_MNF		= 1,			/* Move negative - FNEG */
   236  	ARM_ABS		= 2			/* Absolute value - FABS */
   237  } ARM_FUNARY;
   238  
   239  /*
   240   * Floating-point binary operators.
   241   */
   242  typedef enum
   243  {
   244  	ARM_FADD	= 0,			/* Add */
   245  	ARM_FMUL	= 1,			/* Multiply */
   246  	ARM_FSUB	= 2,			/* Subtract */
   247  	ARM_FDIV	= 4			/* Divide */
   248  } ARM_FBINARY;
   249  
   250  #endif /* JIT_ARM_HAS_VFP */
   251  
   252  /*
   253   * Number of registers that are used for parameters (r0-r3).
   254   */
   255  #define	ARM_NUM_PARAM_REGS	4
   256  
   257  /*
   258   * Type that keeps track of the instruction buffer.
   259   */
   260  typedef unsigned int arm_inst_word;
   261  typedef struct
   262  {
   263  	arm_inst_word *current;
   264  	arm_inst_word *limit;
   265  
   266  } arm_inst_buf;
   267  #define	arm_inst_get_posn(inst)		((inst).current)
   268  #define	arm_inst_get_limit(inst)	((inst).limit)
   269  
   270  /*
   271   * Build an instruction prefix from a condition code and a mask value.
   272   */
   273  #define	arm_build_prefix(cond,mask)	\
   274  			((((unsigned int)(cond)) << 28) | ((unsigned int)(mask)))
   275  
   276  /*
   277   * Build an "always" instruction prefix for a regular instruction.
   278   */
   279  #define arm_prefix(mask)	(arm_build_prefix(ARM_CC_AL, (mask)))
   280  
   281  /*
   282   * Build special "always" prefixes.
   283   */
   284  #define	arm_always			(arm_build_prefix(ARM_CC_AL, 0))
   285  #define	arm_always_cc		(arm_build_prefix(ARM_CC_AL, (1 << 20)))
   286  #define	arm_always_imm		(arm_build_prefix(ARM_CC_AL, (1 << 25)))
   287  
   288  /*
   289   * Wrappers for "arm_always*" that allow higher-level routines
   290   * to change code generation to be based on a condition.  This is
   291   * used to perform branch elimination.
   292   */
   293  #ifndef arm_execute
   294  #define	arm_execute			arm_always
   295  #define	arm_execute_cc		arm_always_cc
   296  #define	arm_execute_imm		arm_always_imm
   297  #endif
   298  
   299  /*
   300   * Initialize an instruction buffer.
   301   */
   302  #define	arm_inst_buf_init(inst,start,end)	\
   303  			do { \
   304  				(inst).current = (arm_inst_word *)(start); \
   305  				(inst).limit = (arm_inst_word *)(end); \
   306  			} while (0)
   307  
   308  /*
   309   * Add an instruction to an instruction buffer.
   310   */
   311  #define	arm_inst_add(inst,value)	\
   312  			do { \
   313  				if((inst).current < (inst).limit) \
   314  				{ \
   315  					*((inst).current)++ = (value); \
   316  				} \
   317  			} while (0)
   318  
   319  /*
   320   * Arithmetic or logical operation which doesn't set condition codes.
   321   */
   322  #define	arm_alu_reg_reg(inst,opc,dreg,sreg1,sreg2)	\
   323  			do { \
   324  				arm_inst_add((inst), arm_execute | \
   325  							(((unsigned int)(opc)) << 21) | \
   326  							(((unsigned int)(dreg)) << 12) | \
   327  							(((unsigned int)(sreg1)) << 16) | \
   328  							 ((unsigned int)(sreg2))); \
   329  			} while (0)
   330  #define	arm_alu_reg_imm8(inst,opc,dreg,sreg,imm)	\
   331  			do { \
   332  				arm_inst_add((inst), arm_execute_imm | \
   333  							(((unsigned int)(opc)) << 21) | \
   334  							(((unsigned int)(dreg)) << 12) | \
   335  							(((unsigned int)(sreg)) << 16) | \
   336  							 ((unsigned int)((imm) & 0xFF))); \
   337  			} while (0)
   338  #define	arm_alu_reg_imm8_cond(inst,opc,dreg,sreg,imm,cond)	\
   339  			do { \
   340  				arm_inst_add((inst), arm_build_prefix((cond), (1 << 25)) | \
   341  							(((unsigned int)(opc)) << 21) | \
   342  							(((unsigned int)(dreg)) << 12) | \
   343  							(((unsigned int)(sreg)) << 16) | \
   344  							 ((unsigned int)((imm) & 0xFF))); \
   345  			} while (0)
   346  #define	arm_alu_reg_imm8_rotate(inst,opc,dreg,sreg,imm,rotate)	\
   347  			do { \
   348  				arm_inst_add((inst), arm_execute_imm | \
   349  							(((unsigned int)(opc)) << 21) | \
   350  							(((unsigned int)(dreg)) << 12) | \
   351  							(((unsigned int)(sreg)) << 16) | \
   352  							(((unsigned int)(rotate)) << 8) | \
   353  							 ((unsigned int)((imm) & 0xFF))); \
   354  			} while (0)
   355  extern void _arm_alu_reg_imm
   356  		(arm_inst_buf *inst, int opc, int dreg,
   357  		 int sreg, int imm, int saveWork, int execute_prefix);
   358  #define	arm_alu_reg_imm(inst,opc,dreg,sreg,imm)	\
   359  			do { \
   360  				int __alu_imm = (int)(imm); \
   361  				if(__alu_imm >= 0 && __alu_imm < 256) \
   362  				{ \
   363  					arm_alu_reg_imm8 \
   364  						((inst), (opc), (dreg), (sreg), __alu_imm); \
   365  				} \
   366  				else \
   367  				{ \
   368  					_arm_alu_reg_imm \
   369  						(&(inst), (opc), (dreg), (sreg), __alu_imm, 0, \
   370  						 arm_execute); \
   371  				} \
   372  			} while (0)
   373  #define	arm_alu_reg_imm_save_work(inst,opc,dreg,sreg,imm)	\
   374  			do { \
   375  				int __alu_imm_save = (int)(imm); \
   376  				if(__alu_imm_save >= 0 && __alu_imm_save < 256) \
   377  				{ \
   378  					arm_alu_reg_imm8 \
   379  						((inst), (opc), (dreg), (sreg), __alu_imm_save); \
   380  				} \
   381  				else \
   382  				{ \
   383  					_arm_alu_reg_imm \
   384  						(&(inst), (opc), (dreg), (sreg), __alu_imm_save, 1, \
   385  						 arm_execute); \
   386  				} \
   387  			} while (0)
   388  #define arm_alu_reg(inst,opc,dreg,sreg)	\
   389  			do { \
   390  				arm_inst_add((inst), arm_execute | \
   391  							(((unsigned int)(opc)) << 21) | \
   392  							(((unsigned int)(dreg)) << 12) | \
   393  							 ((unsigned int)(sreg))); \
   394  			} while (0)
   395  #define arm_alu_reg_cond(inst,opc,dreg,sreg,cond)	\
   396  			do { \
   397  				arm_inst_add((inst), arm_build_prefix((cond), 0) | \
   398  							(((unsigned int)(opc)) << 21) | \
   399  							(((unsigned int)(dreg)) << 12) | \
   400  							 ((unsigned int)(sreg))); \
   401  			} while (0)
   402  
   403  /*
   404   * Arithmetic or logical operation which sets condition codes.
   405   */
   406  #define	arm_alu_cc_reg_reg(inst,opc,dreg,sreg1,sreg2)	\
   407  			do { \
   408  				arm_inst_add((inst), arm_execute_cc | \
   409  							(((unsigned int)(opc)) << 21) | \
   410  							(((unsigned int)(dreg)) << 12) | \
   411  							(((unsigned int)(sreg1)) << 16) | \
   412  							 ((unsigned int)(sreg2))); \
   413  			} while (0)
   414  #define	arm_alu_cc_reg_imm8(inst,opc,dreg,sreg,imm)	\
   415  			do { \
   416  				arm_inst_add((inst), arm_execute_imm | arm_execute_cc | \
   417  							(((unsigned int)(opc)) << 21) | \
   418  							(((unsigned int)(dreg)) << 12) | \
   419  							(((unsigned int)(sreg)) << 16) | \
   420  							 ((unsigned int)((imm) & 0xFF))); \
   421  			} while (0)
   422  #define arm_alu_cc_reg(inst,opc,dreg,sreg)	\
   423  			do { \
   424  				arm_inst_add((inst), arm_execute_cc | \
   425  							(((unsigned int)(opc)) << 21) | \
   426  							(((unsigned int)(dreg)) << 12) | \
   427  							 ((unsigned int)(sreg))); \
   428  			} while (0)
   429  
   430  /*
   431   * Test operation, which sets the condition codes but has no other result.
   432   */
   433  #define arm_test_reg_reg(inst,opc,sreg1,sreg2)	\
   434  			do { \
   435  				arm_alu_cc_reg_reg((inst), (opc), 0, (sreg1), (sreg2)); \
   436  			} while (0)
   437  #define arm_test_reg_imm8(inst,opc,sreg,imm)	\
   438  			do { \
   439  				arm_alu_cc_reg_imm8((inst), (opc), 0, (sreg), (imm)); \
   440  			} while (0)
   441  #define arm_test_reg_imm(inst,opc,sreg,imm)	\
   442  			do { \
   443  				int __test_imm = (int)(imm); \
   444  				if(__test_imm >= 0 && __test_imm < 256) \
   445  				{ \
   446  					arm_alu_cc_reg_imm8((inst), (opc), 0, (sreg), __test_imm); \
   447  				} \
   448  				else \
   449  				{ \
   450  					arm_mov_reg_imm((inst), ARM_WORK, __test_imm); \
   451  					arm_test_reg_reg((inst), (opc), (sreg), ARM_WORK); \
   452  				} \
   453  			} while (0)
   454  
   455  #define arm_test_reg_membase(inst,opc,reg,basereg,disp,scratchreg)	\
   456  			do {	\
   457  				assert(reg!=scratchreg);	\
   458  				assert(basereg!=scratchreg);	\
   459  				arm_load_membase((inst), (tmpreg),(basereg),(disp));	\
   460  				arm_test_reg_reg((inst), (opc), (reg), (tmpreg));	\
   461  } while (0)
   462  
   463  /*
   464   * Move a value between registers.
   465   */
   466  #define	arm_mov_reg_reg(inst,dreg,sreg)	\
   467  			do { \
   468  				arm_alu_reg((inst), ARM_MOV, (dreg), (sreg)); \
   469  			} while (0)
   470  			
   471  /**
   472   * Move a value between floating point registers.
   473   * @var inst is the pointer to the location of memory at which the instruction will be put
   474   * @var dreg is the destination register
   475   * @var freg is the source register
   476   */
   477  #define	arm_mov_freg_freg(inst,dreg,sreg)	\
   478  			do { \
   479  				arm_alu_freg((inst), ARM_MVF, (dreg), (sreg)); \
   480  			} while (0)
   481  
   482  /*
   483   * Move an immediate value into a register.  This is hard because
   484   * ARM lacks an instruction to load a 32-bit immediate value directly.
   485   * We handle the simple cases and then bail out to a function for the rest.
   486   */
   487  #define	arm_mov_reg_imm8(inst,reg,imm)	\
   488  			do { \
   489  				arm_alu_reg_imm8((inst), ARM_MOV, (reg), 0, (imm)); \
   490  			} while (0)
   491  #define	arm_mov_reg_imm8_rotate(inst,reg,imm,rotate)	\
   492  			do { \
   493  				arm_alu_reg_imm8_rotate((inst), ARM_MOV, (reg), \
   494  										0, (imm), (rotate)); \
   495  			} while (0)
   496  extern void _arm_mov_reg_imm
   497  	(arm_inst_buf *inst, int reg, int value, int execute_prefix);
   498  extern int arm_is_complex_imm(int value);
   499  
   500  /**
   501   * Moves the immediate value imm into register reg.
   502   *
   503   * In case imm is > 255, it builds the value one byte at a time, by calling _arm_mov_reg_imm
   504   * This is done by using a big number of instruction.
   505   * In that case, using mov_reg_imm (defined in jit-rules-arm.c is probably a better idea, when possible
   506   */
   507  #define	arm_mov_reg_imm(inst,reg,imm)	\
   508  			do { \
   509  				int __imm = (int)(imm); \
   510  				if(__imm >= 0 && __imm < 256) \
   511  				{ \
   512  					arm_mov_reg_imm8((inst), (reg), __imm); \
   513  				} \
   514  				else if((reg) == ARM_PC) \
   515  				{ \
   516  					_arm_mov_reg_imm \
   517  						(&(inst), ARM_WORK, __imm, arm_execute); \
   518  					arm_mov_reg_reg((inst), ARM_PC, ARM_WORK); \
   519  				} \
   520  				else if(__imm > -256 && __imm < 0) \
   521  				{ \
   522  					arm_mov_reg_imm8((inst), (reg), ~(__imm)); \
   523  					arm_alu_reg((inst), ARM_MVN, (reg), (reg)); \
   524  				} \
   525  				else \
   526  				{ \
   527  					_arm_mov_reg_imm(&(inst), (reg), __imm, arm_execute); \
   528  				} \
   529  			} while (0)
   530  
   531  #define ARM_NOBASEREG (-1)
   532  
   533  /**
   534   * LDR (Load Register), LDRB (Load Register Byte)
   535   * Load the content of the memory area of size "size" at position basereg+disp+(indexreg<<shift) into the 32-bit "reg", with zero-extension.
   536   * "scratchreg" is a scratch register that has to be asked to the register allocator; it is 
   537   * used only when disp!=0; if disp==0, it can have whatever value, since it won't be used
   538   */
   539  #define arm_mov_reg_memindex(inst,reg,basereg,disp,indexreg,shift,size,scratchreg)	\
   540  do {	\
   541  	if (basereg==ARM_NOBASEREG)	\
   542  	{	\
   543  		fprintf(stderr, "TODO(NOBASEREG) at %s, %d\n", __FILE__, (int)__LINE__);	\
   544  	}	\
   545  	else	\
   546  	{	\
   547  		/* Add the displacement (only if needed)*/\
   548  		int tempreg=(basereg);	\
   549  		if (disp!=0)	\
   550  		{	\
   551  			tempreg=(scratchreg);	\
   552  			assert(tempreg!=basereg);	\
   553  			assert(tempreg!=indexreg);	\
   554  			arm_alu_reg_imm((inst), ARM_ADD, (tempreg), (basereg), (disp)); \
   555  		}	\
   556  		/* Load the content, depending on its size */	\
   557  		switch ((size)) {	\
   558  			case 1: arm_load_memindex_either(inst,reg,(tempreg),indexreg,shift,0x00400000); break;	\
   559  			case 2: arm_load_memindex_either(inst,reg,tempreg,indexreg,shift,0);	\
   560  				arm_shift_reg_imm8((inst), ARM_SHL, (reg), (reg), 16);	\
   561  				arm_shift_reg_imm8((inst), ARM_SHR, (reg), (reg), 16);	\
   562  				break; \
   563  			case 4: arm_load_memindex_either(inst,reg,tempreg,indexreg,shift,0); break;	\
   564  			default: assert (0);	\
   565  		}	\
   566  	}	\
   567  } while (0)
   568  
   569  /**
   570   * Store the content of "reg" into a memory area of size "size" at position basereg+disp+(indexreg<<shift)
   571   * NB: the scratch register has to be asked to the register allocator.
   572   *     It can't be ARM_WORK, since it's already used
   573   */
   574  #define arm_mov_memindex_reg(inst,basereg,disp,indexreg,shift,reg,size,scratchreg)	\
   575  do {	\
   576  	if (basereg==ARM_NOBASEREG)	\
   577  	{	\
   578  		fprintf(stderr, "TODO(NOBASEREG) at %s, %d\n", __FILE__, (int)__LINE__);	\
   579  	}	\
   580  	else	\
   581  	{	\
   582  		arm_shift_reg_imm8((inst), ARM_SHL, (ARM_WORK), (indexreg), (shift));	\
   583  		arm_alu_reg_reg((inst), ARM_ADD, (scratchreg), (basereg), ARM_WORK);	\
   584  		arm_mov_membase_reg((inst),(scratchreg),(disp),(reg),(size))	\
   585  	}	\
   586  } while (0);
   587  
   588  /*
   589   * Stores the content of register "reg" in memory, at position "mem" with size "size"
   590   * NB: destroys the content of ARM_WORK
   591   */
   592  #define arm_mov_mem_reg(inst,mem,reg,size)	\
   593  do {	\
   594  	arm_mov_reg_imm((inst), ARM_WORK, (mem));	\
   595  	switch ((size)) {	\
   596  		case 1: arm_store_membase_byte((inst), (reg), ARM_WORK, 0); break;	\
   597  		case 2: arm_store_membase_short((inst), (reg), ARM_WORK, 0); break;	\
   598  		case 4: arm_store_membase((inst), (reg), ARM_WORK, 0); break;	\
   599  		default: jit_assert(0);	\
   600  	}	\
   601  } while (0)
   602  
   603  /*
   604   * Stores the content of "imm" in memory, at position "mem" with size "size". Uses a scratch register (scratchreg),
   605   * that has to be asked to the register allocator via the [scratch reg] parameter in the definition of the OPCODE.
   606   * NB: destroys the content of ARM_WORK
   607   */
   608  #define arm_mov_mem_imm(inst,mem,imm,size,scratchreg)	\
   609  do {	\
   610  	arm_mov_reg_imm((inst), (scratchreg), (imm));	\
   611  	arm_mov_reg_imm((inst), ARM_WORK, (mem));	\
   612  	switch ((size)) {	\
   613  		case 1: arm_store_membase_byte((inst), (scratchreg), ARM_WORK, 0); break;	\
   614  		case 2: arm_store_membase_short((inst), (scratchreg), ARM_WORK, 0); break;	\
   615  		case 4: arm_store_membase((inst), (scratchreg), ARM_WORK, 0); break;	\
   616  		default: assert(0);	\
   617  	}	\
   618  } while (0)
   619  
   620  /**
   621   * Set "size" bytes at position basereg+disp at the value of imm
   622   * NB: destroys the content of scratchreg. A good choice for scratchreg is ARM_WORK,
   623   * unless the value of disp is too big to be handled by arm_store_membase_either. In that case,
   624   * it's better to require the allocation of a scratch reg by adding the parameter [scratch reg] at the end
   625   * of the parameters of the rule inside jit-rules-arm.ins that's calling this function.
   626   */
   627  #define arm_mov_membase_imm(inst,basereg,disp,imm,size,scratchreg)	\
   628  do {	\
   629  	arm_mov_reg_imm((inst), (scratchreg), imm);	\
   630  	arm_mov_membase_reg((inst), (basereg), (disp), (scratchreg), (size));	\
   631  } while(0);
   632  
   633  /**
   634   * Set "size" bytes at position basereg+disp at the value of reg
   635   * NB: might destroy the content of ARM_WORK because of arm_store_membase
   636   */
   637  #define arm_mov_membase_reg(inst,basereg,disp,reg,size)	\
   638  do {	\
   639  	switch ((size)) {	\
   640  		case 1: arm_store_membase_byte((inst), (reg), (basereg), (disp)); break;	\
   641  		case 2: arm_store_membase_short((inst), (reg), (basereg), (disp)); break;	\
   642  		case 4: arm_store_membase((inst), (reg), (basereg), (disp)); break;	\
   643  		default: jit_assert(0);	\
   644  	}	\
   645  } while(0);
   646  
   647  /**
   648  * Set the value of "reg" to the "size"-bytes-long value held in memory at position basereg+disp
   649  * NB: can destroys the content of ARM_WORK because of arm_store_membase_short
   650  */
   651  #define arm_mov_reg_membase(inst,reg,basereg,disp,size)	\
   652  do {	\
   653  	switch ((size)) {	\
   654  		case 1: arm_load_membase_byte((inst), (reg), (basereg), (disp)); break;	\
   655  		case 2: arm_load_membase_short((inst), (reg), (basereg), (disp)); break;	\
   656  		case 4: arm_load_membase((inst), (reg), (basereg), (disp)); break;	\
   657  		default: jit_assert(0);	\
   658  }	\
   659  } while(0);
   660  
   661  /*
   662   * Clear a register to zero.
   663   */
   664  #define	arm_clear_reg(inst,reg)	\
   665  			do { \
   666  				arm_mov_reg_imm8((inst), (reg), 0); \
   667  			} while (0)
   668  
   669  /*
   670   * No-operation instruction.
   671   */
   672  #define	arm_nop(inst)	arm_mov_reg_reg((inst), ARM_R0, ARM_R0)
   673  
   674  /*
   675   * Perform a shift operation.
   676   */
   677  #define	arm_shift_reg_reg(inst,opc,dreg,sreg1,sreg2) \
   678  			do { \
   679  				arm_inst_add((inst), arm_execute | \
   680  							(((unsigned int)ARM_MOV) << 21) | \
   681  							(((unsigned int)(dreg)) << 12) | \
   682  							(((unsigned int)(sreg2)) << 8) | \
   683  							(((unsigned int)(opc)) << 5) | \
   684  							 ((unsigned int)(1 << 4)) | \
   685  							 ((unsigned int)(sreg1))); \
   686  			} while (0)
   687  #define	arm_shift_reg_imm8(inst,opc,dreg,sreg,imm) \
   688  			do { \
   689  				arm_inst_add((inst), arm_execute | \
   690  							(((unsigned int)ARM_MOV) << 21) | \
   691  							(((unsigned int)(dreg)) << 12) | \
   692  							(((unsigned int)(opc)) << 5) | \
   693  							(((unsigned int)(imm)) << 7) | \
   694  							 ((unsigned int)(sreg))); \
   695  			} while (0)
   696  
   697  /*
   698   * Perform a multiplication instruction.  Note: ARM instruction rules
   699   * say that dreg should not be the same as sreg2, so we swap the order
   700   * of the arguments if that situation occurs.  We assume that sreg1
   701   * and sreg2 are distinct registers.
   702   */
   703  #define arm_mul_reg_reg(inst,dreg,sreg1,sreg2)	\
   704  			do { \
   705  				if((dreg) != (sreg2)) \
   706  				{ \
   707  					arm_inst_add((inst), arm_prefix(0x00000090) | \
   708  								(((unsigned int)(dreg)) << 16) | \
   709  								(((unsigned int)(sreg1)) << 8) | \
   710  								 ((unsigned int)(sreg2))); \
   711  				} \
   712  				else \
   713  				{ \
   714  					arm_inst_add((inst), arm_prefix(0x00000090) | \
   715  								(((unsigned int)(dreg)) << 16) | \
   716  								(((unsigned int)(sreg2)) << 8) | \
   717  								 ((unsigned int)(sreg1))); \
   718  				} \
   719  			} while (0)
   720  
   721  #ifdef JIT_ARM_HAS_FPA
   722  /*
   723   * Perform a binary operation on floating-point arguments.
   724   */
   725  #define	arm_alu_freg_freg(inst,opc,dreg,sreg1,sreg2)	\
   726  			do { \
   727  				arm_inst_add((inst), arm_prefix(0x0E000180) | \
   728  							(((unsigned int)(opc)) << 20) | \
   729  							(((unsigned int)(dreg)) << 12) | \
   730  							(((unsigned int)(sreg1)) << 16) | \
   731  							 ((unsigned int)(sreg2))); \
   732  			} while (0)
   733  #define	arm_alu_freg_freg_32(inst,opc,dreg,sreg1,sreg2)	\
   734  			do { \
   735  				arm_inst_add((inst), arm_prefix(0x0E000100) | \
   736  							(((unsigned int)(opc)) << 20) | \
   737  							(((unsigned int)(dreg)) << 12) | \
   738  							(((unsigned int)(sreg1)) << 16) | \
   739  							 ((unsigned int)(sreg2))); \
   740  			} while (0)
   741  
   742  /*
   743   * Perform a unary operation on floating-point arguments.
   744   */
   745  #define	arm_alu_freg(inst,opc,dreg,sreg)	\
   746  			do { \
   747  				arm_inst_add((inst), arm_prefix(0x0E008180) | \
   748  							(((unsigned int)(opc)) << 20) | \
   749  							(((unsigned int)(dreg)) << 12) | \
   750  							 ((unsigned int)(sreg))); \
   751  			} while (0)
   752  #define	arm_alu_freg_32(inst,opc,dreg,sreg)	\
   753  			do { \
   754  				arm_inst_add((inst), arm_prefix(0x0E008100) | \
   755  							(((unsigned int)(opc)) << 20) | \
   756  							(((unsigned int)(dreg)) << 12) | \
   757  							 ((unsigned int)(sreg))); \
   758  			} while (0)
   759  
   760  
   761  #endif /* JIT_ARM_HAS_FPA */
   762  
   763  #ifdef JIT_ARM_HAS_VFP
   764  /**
   765   * Perform a binary operation on double-precision floating-point arguments.
   766   * OPC is the number indicating the operation to execute (taken from enum ARM_FBINARY)
   767   * sreg1 and sreg2 are the registers containing the first and second operand
   768   * dreg is the destination register
   769   */
   770  #define	arm_alu_freg_freg(inst,opc,dreg,sreg1,sreg2)	\
   771  			do { \
   772  				unsigned int mask;	\
   773  				switch(opc)	\
   774  				{	\
   775  					case ARM_FADD:	\
   776  						mask=0x0E300B00;	\
   777  						break;	\
   778  					case ARM_FMUL:	\
   779  						mask=0x0E200B00;	\
   780  						break;	\
   781  					case ARM_FSUB:	\
   782  						mask=0x0E300B40;	\
   783  						break;	\
   784  					case ARM_FDIV:	\
   785  						mask=0x0E800B00;	\
   786  						break;	\
   787  					default:	\
   788  						printf("Unimplemented binary operation %d in %s\n", opc,  __FILE__);	\
   789  						abort();	\
   790  				}	\
   791  				arm_inst_add((inst), arm_prefix(mask) | \
   792  							(((unsigned int)(dreg)) << 12) | \
   793  							(((unsigned int)(sreg1)) << 16) | \
   794  							((unsigned int)(sreg2))); \
   795  			} while (0)
   796  /**
   797   * Perform a binary operation on single-precision floating-point arguments.
   798   * OPC is the number indicating the operation to execute (taken from enum ARM_FBINARY)
   799   * sreg1 and sreg2 are the registers containing the first and second operand
   800   * dreg is the destination register
   801   */
   802  #define	arm_alu_freg_freg_32(inst,opc,dreg,sreg1,sreg2)	\
   803  do { \
   804  	unsigned int mask;	\
   805  	switch(opc)	\
   806  	{	\
   807  		case ARM_FADD:	\
   808  			mask=0x0E300A00;	\
   809  			break;	\
   810  		case ARM_FMUL:	\
   811  			mask=0x0E200A00;	\
   812  			break;	\
   813  		case ARM_FSUB:	\
   814  			mask=0x0E300A40;	\
   815  			break;	\
   816  		case ARM_FDIV:	\
   817  			mask=0x0E800A00;	\
   818  			break;	\
   819  		default:	\
   820  			printf("Unimplemented binary operation %d in %s\n", opc,  __FILE__);	\
   821  			abort();	\
   822  	}	\
   823  	unsigned int dreg_top_4_bits = (dreg & 0x1E) >> 1;	\
   824  	unsigned int dreg_bottom_bit = (dreg & 0x01);	\
   825  	unsigned int sreg1_top_4_bits = (sreg1 & 0x1E) >> 1;	\
   826  	unsigned int sreg1_bottom_bit = (sreg1 & 0x01);	\
   827  	unsigned int sreg2_top_4_bits = (sreg2 & 0x1E) >> 1;	\
   828  	unsigned int sreg2_bottom_bit = (sreg2 & 0x01);	\
   829  	arm_inst_add((inst), arm_prefix(mask) | \
   830  				(((unsigned int)(dreg_top_4_bits)) << 12) |	\
   831  				(((unsigned int)(dreg_bottom_bit)) << 22) |	\
   832  				(((unsigned int)(sreg1_top_4_bits)) << 16) |	\
   833  				(((unsigned int)(sreg1_bottom_bit)) << 7) |	\
   834  				(((unsigned int)(sreg2_bottom_bit)) << 5) |	\
   835  				((unsigned int)(sreg2_top_4_bits))); \
   836  } while (0)
   837  
   838  /**
   839  * Perform a unary operation on a double-precision floating-point argument.
   840  * OPC is the number indicating the operation to execute (taken from enum ARM_FUNARY)
   841  * sreg is the register containing the operand
   842  * dreg is the destination register
   843  */
   844  #define	arm_alu_freg(inst,opc,dreg,sreg)	\
   845  			do { \
   846  				unsigned int mask;	\
   847  				switch(opc)	\
   848  				{	\
   849  					case ARM_MVF:	\
   850  						mask=0xEB00B40;	\
   851  						break;	\
   852  					case ARM_MNF:	\
   853  						mask=0xEB10B40;	\
   854  						break;	\
   855  					case ARM_ABS:	\
   856  						mask=0xEB00BC0;	\
   857  						break;	\
   858  					default:	\
   859  						printf("Unimplemented unary operation %d in %s\n", opc,  __FILE__);	\
   860  						abort();	\
   861  				}	\
   862  				arm_inst_add((inst), arm_prefix(mask) | \
   863  							(((unsigned int)(dreg)) << 12) |	\
   864  							 ((unsigned int)(sreg))); \
   865  			} while (0)
   866  			
   867  /**
   868   * Perform a unary operation on a single-precision floating-point argument.
   869   * OPC is the number indicating the operation to execute (taken from enum ARM_FUNARY)
   870   * sreg is the register containing the operand
   871   * dreg is the destination register
   872   */			
   873  #define	arm_alu_freg_32(inst,opc,dreg,sreg)	\
   874  			do { \
   875  				unsigned int mask;	\
   876  				switch(opc)	\
   877  				{	\
   878  					case ARM_MVF:	\
   879  						mask=0xEB00A40;	\
   880  						break;	\
   881  					case ARM_MNF:	\
   882  						mask=0xEB10A40;	\
   883  						break;	\
   884  					case ARM_ABS:	\
   885  						mask=0xEB00AC0;	\
   886  						break;	\
   887  					default:	\
   888  						printf("Unimplemented OPCODE in %s\n", __FILE__);	\
   889  						abort();	\
   890  				}	\
   891  				unsigned int dreg_top_4_bits = (dreg & 0x1E) >> 1;	\
   892  				unsigned int dreg_bottom_bit = (dreg & 0x01);	\
   893  				unsigned int sreg_top_4_bits = (sreg & 0x1E) >> 1;	\
   894  				unsigned int sreg_bottom_bit = (sreg & 0x01);	\
   895  				arm_inst_add((inst), arm_prefix(mask) | \
   896  							(((unsigned int)(dreg_top_4_bits)) << 12) |	\
   897  							(((unsigned int)(dreg_bottom_bit)) << 22) |	\
   898  							(((unsigned int)(sreg_bottom_bit)) << 5)  |	\
   899  							 ((unsigned int)(sreg_top_4_bits))); \
   900  			} while (0)
   901  
   902  #endif /* JIT_ARM_HAS_VFP */
   903  /*
   904   * Branch or jump immediate by a byte offset.  The offset is
   905   * assumed to be +/- 32 Mbytes.
   906   */
   907  #define	arm_branch_imm(inst,cond,imm)	\
   908  			do { \
   909  				arm_inst_add((inst), arm_build_prefix((cond), 0x0A000000) | \
   910  							(((unsigned int)(((int)(imm)) >> 2)) & \
   911  								0x00FFFFFF)); \
   912  			} while (0)
   913  #define	arm_jump_imm(inst,imm)	arm_branch_imm((inst), ARM_CC_AL, (imm))
   914  
   915  /*
   916   * Branch or jump to a specific target location.  The offset is
   917   * assumed to be +/- 32 Mbytes.
   918   */
   919  #define	arm_branch(inst,cond,target)	\
   920  			do { \
   921  				int __br_offset = (int)(((unsigned char *)(target)) - \
   922  					           (((unsigned char *)((inst).current)) + 8)); \
   923  				arm_branch_imm((inst), (cond), __br_offset); \
   924  			} while (0)
   925  #define	arm_jump(inst,target)	arm_branch((inst), ARM_CC_AL, (target))
   926  
   927  /*
   928   * Jump to a specific target location that may be greater than
   929   * 32 Mbytes away from the current location.
   930   */
   931  #define	arm_jump_long(inst,target)	\
   932  			do { \
   933  				int __jmp_offset = (int)(((unsigned char *)(target)) - \
   934  					            (((unsigned char *)((inst).current)) + 8)); \
   935  				if(__jmp_offset >= -0x04000000 && __jmp_offset < 0x04000000) \
   936  				{ \
   937  					arm_jump_imm((inst), __jmp_offset); \
   938  				} \
   939  				else \
   940  				{ \
   941  					arm_mov_reg_imm((inst), ARM_PC, (int)(target)); \
   942  				} \
   943  			} while (0)
   944  
   945  /*
   946   * Back-patch a branch instruction.
   947   */
   948  #define	arm_patch(inst,posn,target)	\
   949  			do { \
   950  				int __p_offset = (int)(((unsigned char *)(target)) - \
   951  							          (((unsigned char *)(posn)) + 8)); \
   952  				__p_offset = (__p_offset >> 2) & 0x00FFFFFF; \
   953  				if(((arm_inst_word *)(posn)) < (inst).limit) \
   954  				{ \
   955  					*((int *)(posn)) = (*((int *)(posn)) & 0xFF000000) | \
   956  						__p_offset; \
   957  				} \
   958  			} while (0)
   959  
   960  /*
   961   * Call a subroutine immediate by a byte offset.
   962   */
   963  #define	arm_call_imm(inst,imm)	\
   964  			do { \
   965  				arm_inst_add((inst), arm_prefix(0x0B000000) | \
   966  							(((unsigned int)(((int)(imm)) >> 2)) & \
   967  								0x00FFFFFF)); \
   968  			} while (0)
   969  
   970  /*
   971   * Call a subroutine at a specific target location.
   972   * (Equivalent to x86_call_code)
   973   */
   974  #define	arm_call(inst,target)	\
   975  			do { \
   976  				int __call_offset = (int)(((unsigned char *)(target)) - \
   977  					             (((unsigned char *)((inst).current)) + 8)); \
   978  				if(__call_offset >= -0x04000000 && __call_offset < 0x04000000) \
   979  				{ \
   980  					arm_call_imm((inst), __call_offset); \
   981  				} \
   982  				else \
   983  				{ \
   984  					arm_load_membase((inst), ARM_WORK, ARM_PC, 4); \
   985  					arm_alu_reg_imm8((inst), ARM_ADD, ARM_LINK, ARM_PC, 4); \
   986  					arm_mov_reg_reg((inst), ARM_PC, ARM_WORK); \
   987  					arm_inst_add((inst), (int)(target)); \
   988  				} \
   989  			} while (0)
   990  
   991  /*
   992   * Return from a subroutine, where the return address is in the link register.
   993   */
   994  #define	arm_return(inst)	\
   995  			do { \
   996  				arm_mov_reg_reg((inst), ARM_PC, ARM_LINK); \
   997  			} while (0)
   998  
   999  /*
  1000   * Push a register onto the system stack.
  1001   */
  1002  #define	arm_push_reg(inst,reg)	\
  1003  			do { \
  1004  				arm_inst_add((inst), arm_prefix(0x05200004) | \
  1005  							(((unsigned int)ARM_SP) << 16) | \
  1006  							(((unsigned int)(reg)) << 12)); \
  1007  			} while (0)
  1008  
  1009  /*
  1010   * Pop a register from the system stack.
  1011   */
  1012  #define	arm_pop_reg(inst,reg)	\
  1013  			do { \
  1014  				arm_inst_add((inst), arm_prefix(0x04900004) | \
  1015  							(((unsigned int)ARM_SP) << 16) | \
  1016  							(((unsigned int)(reg)) << 12)); \
  1017  			} while (0)
  1018  
  1019  /*
  1020   * Pop the top of the system stack and put it at a given offset from the position specified by basereg (that is, usually, the frame pointer). NB: This macro thrashes the content of ARM_WORK
  1021   */
  1022  #define arm_pop_membase(inst,basereg,offset)	\
  1023  			do {	\
  1024  				arm_pop_reg((inst), ARM_WORK);	\
  1025  				arm_store_membase((inst),ARM_WORK,basereg,offset);	\
  1026  			} while (0)
  1027  			
  1028  /*
  1029   * Set up a local variable frame, and save the registers in "regset".
  1030   */
  1031  #define	arm_setup_frame(inst,regset)	\
  1032  			do { \
  1033  				arm_mov_reg_reg((inst), ARM_WORK, ARM_SP); \
  1034  				arm_inst_add((inst), arm_prefix(0x0920D800) | \
  1035  							(((unsigned int)ARM_SP) << 16) | \
  1036  							(((unsigned int)(regset)))); \
  1037  				arm_alu_reg_imm8((inst), ARM_SUB, ARM_FP, ARM_WORK, 4); \
  1038  			} while (0)
  1039  
  1040  /*
  1041   * Pop a local variable frame, restore the registers in "regset",
  1042   * and return to the caller.
  1043   */
  1044  #define	arm_pop_frame(inst,regset)	\
  1045  			do { \
  1046  				arm_inst_add((inst), arm_prefix(0x0910A800) | \
  1047  							(((unsigned int)ARM_FP) << 16) | \
  1048  							(((unsigned int)(regset)))); \
  1049  			} while (0)
  1050  
  1051  /*
  1052   * Pop a local variable frame, in preparation for a tail call.
  1053   * This restores "lr" to its original value, but does not set "pc".
  1054   */
  1055  #define	arm_pop_frame_tail(inst,regset)	\
  1056  			do { \
  1057  				arm_inst_add((inst), arm_prefix(0x09106800) | \
  1058  							(((unsigned int)ARM_FP) << 16) | \
  1059  							(((unsigned int)(regset)))); \
  1060  			} while (0)
  1061  
  1062  /*
  1063   * Load a word value from a pointer and then advance the pointer.
  1064   */
  1065  #define	arm_load_advance(inst,dreg,sreg)	\
  1066  			do { \
  1067  				arm_inst_add((inst), arm_prefix(0x04900004) | \
  1068  							(((unsigned int)(sreg)) << 16) | \
  1069  							(((unsigned int)(dreg)) << 12)); \
  1070  			} while (0)
  1071  
  1072  /*
  1073   * Load a value from an address into a register.
  1074   */
  1075  #define arm_load_membase_either(inst,reg,basereg,imm,mask)	\
  1076  			do { \
  1077  				int __mb_offset = (int)(imm); \
  1078  				if(__mb_offset >= 0 && __mb_offset < (1 << 12)) \
  1079  				{ \
  1080  					arm_inst_add((inst), arm_prefix(0x05900000 | (mask)) | \
  1081  								(((unsigned int)(basereg)) << 16) | \
  1082  								(((unsigned int)(reg)) << 12) | \
  1083  								 ((unsigned int)__mb_offset)); \
  1084  				} \
  1085  				else if(__mb_offset > -(1 << 12) && __mb_offset < 0) \
  1086  				{ \
  1087  					arm_inst_add((inst), arm_prefix(0x05100000 | (mask)) | \
  1088  								(((unsigned int)(basereg)) << 16) | \
  1089  								(((unsigned int)(reg)) << 12) | \
  1090  								 ((unsigned int)(-__mb_offset))); \
  1091  				} \
  1092  				else \
  1093  				{ \
  1094  					assert(basereg!=ARM_WORK);	\
  1095  					arm_mov_reg_imm((inst), ARM_WORK, __mb_offset); \
  1096  					arm_inst_add((inst), arm_prefix(0x07900000 | (mask)) | \
  1097  								(((unsigned int)(basereg)) << 16) | \
  1098  								(((unsigned int)(reg)) << 12) | \
  1099  								 ((unsigned int)ARM_WORK)); \
  1100  				} \
  1101  			} while (0)
  1102  #define	arm_load_membase(inst,reg,basereg,imm)	\
  1103  			do { \
  1104  				arm_load_membase_either((inst), (reg), (basereg), (imm), 0); \
  1105  			} while (0)
  1106  
  1107  /**
  1108   * Moves the content of 1 byte (is_half==0) or 2 bytes (is_half==1) from memory address basereg+disp+(indexreg<<shift) into dreg, with sign extension (is_signed==1) or zero extension (is_signed==0)
  1109   */
  1110  #define arm_widen_memindex(inst,dreg,basereg,disp,indexreg,shift,is_signed,is_half)	\
  1111  do {	\
  1112  	int scratchreg=ARM_WORK;	\
  1113  	if(is_half)	\
  1114  	{	\
  1115  		arm_mov_reg_memindex((inst),(dreg),(basereg),(disp),(indexreg),(shift),2, scratchreg);	\
  1116  	}	\
  1117  	else	\
  1118  	{	\
  1119  		arm_mov_reg_memindex((inst),(dreg),(basereg),(disp),(indexreg),(shift),1, scratchreg);	\
  1120  	}	\
  1121  	if(is_signed)	\
  1122  	{	\
  1123  		int shiftSize;	\
  1124  		if (is_half)	\
  1125  		{	\
  1126  			shiftSize=16;	\
  1127  		}	\
  1128  		else	\
  1129  		{	\
  1130  			shiftSize=24;	\
  1131  		}	\
  1132  		arm_shift_reg_imm8((inst), ARM_SHL, (dreg), (dreg), shiftSize); \
  1133  		arm_shift_reg_imm8((inst), ARM_SAR, (dreg), (dreg), shiftSize);	\
  1134  	}	\
  1135  } while (0)
  1136  
  1137  #define	arm_load_membase_byte(inst,reg,basereg,imm)	\
  1138  			do { \
  1139  				arm_load_membase_either((inst), (reg), (basereg), (imm), \
  1140  										0x00400000); \
  1141  			} while (0)
  1142  #define	arm_load_membase_sbyte(inst,reg,basereg,imm)	\
  1143  			do { \
  1144  				arm_load_membase_either((inst), (reg), (basereg), (imm), \
  1145  										0x00400000); \
  1146  				arm_shift_reg_imm8((inst), ARM_SHL, (reg), (reg), 24); \
  1147  				arm_shift_reg_imm8((inst), ARM_SAR, (reg), (reg), 24); \
  1148  			} while (0)
  1149  #define	arm_load_membase_ushort(inst,reg,basereg,imm)	\
  1150  			do { \
  1151  				arm_load_membase_byte((inst), ARM_WORK, (basereg), (imm)); \
  1152  				arm_load_membase_byte((inst), (reg), (basereg), (imm) + 1); \
  1153  				arm_shift_reg_imm8((inst), ARM_SHL, (reg), (reg), 8); \
  1154  				arm_alu_reg_reg((inst), ARM_ORR, (reg), (reg), ARM_WORK); \
  1155  			} while (0)
  1156  #define	arm_load_membase_short(inst,reg,basereg,imm)	\
  1157  			do { \
  1158  				arm_load_membase_byte((inst), ARM_WORK, (basereg), (imm)); \
  1159  				arm_load_membase_byte((inst), (reg), (basereg), (imm) + 1); \
  1160  				arm_shift_reg_imm8((inst), ARM_SHL, (reg), (reg), 24); \
  1161  				arm_shift_reg_imm8((inst), ARM_SAR, (reg), (reg), 16); \
  1162  				arm_alu_reg_reg((inst), ARM_ORR, (reg), (reg), ARM_WORK); \
  1163  			} while (0)
  1164  
  1165  
  1166  #ifdef JIT_ARM_HAS_FPA
  1167  /*
  1168   * Load a floating-point value from an address into a register.
  1169   */
  1170  #define	arm_load_membase_float(inst,reg,basereg,imm,mask)	\
  1171  			do { \
  1172  				int __mb_offset = (int)(imm); \
  1173  				if(__mb_offset >= 0 && __mb_offset < (1 << 10) && \
  1174  				   (__mb_offset & 3) == 0) \
  1175  				{ \
  1176  					arm_inst_add((inst), arm_prefix(0x0D900100 | (mask)) | \
  1177  							(((unsigned int)(basereg)) << 16) | \
  1178  							(((unsigned int)(reg)) << 12) | \
  1179  							 ((unsigned int)((__mb_offset / 4) & 0xFF))); \
  1180  				} \
  1181  				else if(__mb_offset > -(1 << 10) && __mb_offset < 0 && \
  1182  				        (__mb_offset & 3) == 0) \
  1183  				{ \
  1184  					arm_inst_add((inst), arm_prefix(0x0D180100 | (mask)) | \
  1185  							(((unsigned int)(basereg)) << 16) | \
  1186  							(((unsigned int)(reg)) << 12) | \
  1187  							 ((unsigned int)(((-__mb_offset) / 4) & 0xFF)));\
  1188  				} \
  1189  				else \
  1190  				{ \
  1191  					arm_mov_reg_imm((inst), ARM_WORK, __mb_offset); \
  1192  					arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, \
  1193  								    (basereg), ARM_WORK); \
  1194  					arm_inst_add((inst), arm_prefix(0x0D900100 | (mask)) | \
  1195  							(((unsigned int)ARM_WORK) << 16) | \
  1196  							(((unsigned int)(reg)) << 12)); \
  1197  				} \
  1198  			} while (0)
  1199  #define	arm_load_membase_float32(inst,reg,basereg,imm)	\
  1200  			do { \
  1201  				arm_load_membase_float((inst), (reg), (basereg), (imm), 0); \
  1202  			} while (0)
  1203  #define	arm_load_membase_float64(inst,reg,basereg,imm)	\
  1204  			do { \
  1205  				arm_load_membase_float((inst), (reg), (basereg), \
  1206  									   (imm), 0x00008000); \
  1207  			} while (0)
  1208  
  1209  #endif /* JIT_ARM_HAS_FPA */
  1210  
  1211  #ifdef JIT_ARM_HAS_VFP
  1212  /**
  1213   * FLDS (Floating-point Load, Single-precision)
  1214   * Loads a word from memory address basereg+imm to 
  1215   * the single precision floating point register reg.
  1216   * "mask" is usually set to 0
  1217   */
  1218  #define	arm_load_membase_float(inst,reg,basereg,imm,mask)	\
  1219  do { \
  1220  	unsigned int reg_top_4_bits = (reg & 0x1E) >> 1;	\
  1221  	unsigned int reg_bottom_bit = (reg & 0x01);	\
  1222  	int __mb_offset = (int)(imm); \
  1223  	if(__mb_offset >= 0 && __mb_offset < (1 << 10) && \
  1224  		(__mb_offset & 3) == 0) \
  1225  	{ \
  1226  		arm_inst_add((inst), arm_prefix(0x0D900A00 | (mask)) | \
  1227  			(((unsigned int)(basereg)) << 16) | \
  1228  			(((unsigned int)(reg_top_4_bits)) << 12) | \
  1229  			(((unsigned int)(reg_bottom_bit)) << 22) | \
  1230  			((unsigned int)((__mb_offset / 4) & 0xFF))); \
  1231  	} \
  1232  	else if(__mb_offset > -(1 << 10) && __mb_offset < 0 && \
  1233  		(__mb_offset & 3) == 0) \
  1234  	{ \
  1235  		arm_inst_add((inst), arm_prefix(0x0D100A00 | (mask)) | \
  1236  			(((unsigned int)(basereg)) << 16) | \
  1237  			(((unsigned int)(reg_top_4_bits)) << 12) | \
  1238  			(((unsigned int)(reg_bottom_bit)) << 22) | \
  1239  			((unsigned int)(((-__mb_offset) / 4) & 0xFF)));\
  1240  	} \
  1241  	else \
  1242  	{ \
  1243  		assert(reg != ARM_WORK);	\
  1244  		assert(basereg!=ARM_WORK);	\
  1245  		if(__mb_offset > 0)	\
  1246  		{	\
  1247  			arm_mov_reg_imm((inst), ARM_WORK, __mb_offset);	\
  1248  			arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, (basereg), ARM_WORK);	\
  1249  		}	\
  1250  		else	\
  1251  		{	\
  1252  			arm_mov_reg_imm((inst), ARM_WORK, -__mb_offset);	\
  1253  			arm_alu_reg_reg((inst), ARM_SUB, ARM_WORK, (basereg), ARM_WORK);	\
  1254  		}	\
  1255  		arm_inst_add((inst), arm_prefix(0x0D900A00 | (mask)) | \
  1256  			(((unsigned int)ARM_WORK) << 16) | \
  1257  			(((unsigned int)(reg_top_4_bits)) << 12) | \
  1258  			(((unsigned int)(reg_bottom_bit)) << 22)); \
  1259  	} \
  1260  } while (0)
  1261  
  1262  /**
  1263   * FLDD
  1264   */
  1265  #define	arm_load_membase_float64(inst,reg,basereg,imm)	\
  1266  			do { \
  1267  				int __mb_offset = (int)(imm); \
  1268  				if(__mb_offset >= 0 && __mb_offset < (1 << 10) && \
  1269  				   (__mb_offset & 3) == 0) \
  1270  				{ \
  1271  					arm_inst_add((inst), arm_prefix(0x0D900B00) | \
  1272  							(((unsigned int)(basereg)) << 16) | \
  1273  							(((unsigned int)(reg)) << 12) | \
  1274  							 ((unsigned int)((__mb_offset / 4) & 0xFF))); \
  1275  				} \
  1276  				else if(__mb_offset > -(1 << 10) && __mb_offset < 0 && \
  1277  				        (__mb_offset & 3) == 0) \
  1278  				{ \
  1279  					arm_inst_add((inst), arm_prefix(0x0D100B00) | \
  1280  							(((unsigned int)(basereg)) << 16) | \
  1281  							(((unsigned int)(reg)) << 12) | \
  1282  							 ((unsigned int)(((-__mb_offset) / 4) & 0xFF)));\
  1283  				} \
  1284  				else \
  1285  				{ \
  1286  					assert(reg != ARM_WORK);	\
  1287  					assert(basereg!=ARM_WORK);	\
  1288  					if(__mb_offset > 0)	\
  1289  					{	\
  1290  						arm_mov_reg_imm((inst), ARM_WORK, __mb_offset);	\
  1291  						arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, (basereg), ARM_WORK);	\
  1292  					}	\
  1293  					else	\
  1294  					{	\
  1295  						arm_mov_reg_imm((inst), ARM_WORK, -__mb_offset);	\
  1296  						arm_alu_reg_reg((inst), ARM_SUB, ARM_WORK, (basereg), ARM_WORK);	\
  1297  					}	\
  1298  					arm_inst_add((inst), arm_prefix(0x0D900B00) | \
  1299  							(((unsigned int)ARM_WORK) << 16) | \
  1300  							(((unsigned int)(reg)) << 12)); \
  1301  				} \
  1302  			} while (0)
  1303  
  1304  #define	arm_load_membase_float32(inst,reg,basereg,imm)	\
  1305  			do { \
  1306  				arm_load_membase_float((inst), (reg), (basereg), (imm), 0); \
  1307  			} while (0)
  1308  
  1309  /**
  1310  * Load the content of the memory area at position basereg+disp into the float register "dfreg",
  1311  * using the appropriate instruction depending whether the value to be loaded is_double (1 => 64 bits) or not (0 => 32 bits)
  1312  * (it's similar to x86_fld_membase)
  1313  */
  1314  #define arm_fld_membase(inst,dfreg,basereg,disp,is_double)	\
  1315  do {	\
  1316  	if (is_double)	\
  1317  	{	\
  1318  		arm_load_membase_float64((inst), (dfreg), (basereg), (disp));	\
  1319  	}	\
  1320  	else	\
  1321  	{	\
  1322  		arm_load_membase_float32((inst), (dfreg), (basereg), (disp));	\
  1323  	}\
  1324  } while(0)
  1325  
  1326  /**
  1327   * Load the content of the memory area at position basereg+disp+(indexreg<<shift) into the float register "dfreg",
  1328   * using the appropriate instruction depending whether the value to be loaded is_double (1 => 64 bits) or not (0 => 32 bits)
  1329   * (it's similar to x86_fld_memindex)
  1330   */
  1331  #define arm_fld_memindex(inst,dfreg,basereg,disp,indexreg,shift,is_double,scratchreg) \
  1332  	do {	\
  1333  		if (is_double)	\
  1334  		{	\
  1335  			arm_load_memindex_float64((inst), (dfreg), (basereg), (disp), (indexreg), (shift), (scratchreg));	\
  1336  		}	\
  1337  		else	\
  1338  		{	\
  1339  			arm_load_memindex_float32((inst), (dfreg), (basereg), (disp), (indexreg), (shift), (scratchreg));	\
  1340  		}\
  1341  	} while(0)
  1342  /**
  1343   * Load the content of the 64-bits memory area at position basereg+disp+(indexreg<<shift) into the double register "dfreg"
  1344   * NB: the scratch register has to be asked to the register allocator.
  1345   *     It can't be ARM_WORK, since it's already used
  1346   */
  1347  #define arm_load_memindex_float64(inst,dfreg,basereg,disp,indexreg,shift,scratchreg)	\
  1348  	do {	\
  1349  		arm_shift_reg_imm8((inst), ARM_SHL, ARM_WORK, (indexreg), (shift));	\
  1350  		arm_alu_reg_reg((inst), ARM_ADD, (scratchreg), (basereg), ARM_WORK);	\
  1351  		arm_load_membase_float64((inst), (dfreg), (scratchreg), (disp));	\
  1352  	} while (0)
  1353  	
  1354  /**
  1355   * Load the content of the 32-bits memory area at position basereg+disp+(indexreg<<shift) into the single float register "dfreg"
  1356   * NB: the scratch register has to be asked to the register allocator.
  1357   *     It can't be ARM_WORK, since it's already used
  1358   */
  1359  #define arm_load_memindex_float32(inst,dfreg,basereg,disp,indexreg,shift,scratchreg)	\
  1360  do {	\
  1361  	arm_shift_reg_imm8((inst), ARM_SHL, ARM_WORK, (indexreg), (shift));	\
  1362  	arm_alu_reg_reg((inst), ARM_ADD, (scratchreg), (basereg), ARM_WORK);	\
  1363  	arm_load_membase_float32((inst), (dfreg), (scratchreg), (disp));	\
  1364  	} while (0)
  1365  
  1366  /**
  1367   * Store the content of the float register "sfreg" into the memory area at position basereg+disp+(indexreg<<shift)
  1368   * using the appropriate instruction depending whether the value to be loaded is_double (1 => 64 bits) or not (0 => 32 bits)
  1369   * (it's similar to x86_fst_memindex)
  1370   */
  1371  #define arm_fst_memindex(inst,sfreg,basereg,disp,indexreg,shift,is_double,scratchreg) \
  1372  do {	\
  1373  	if (is_double)	\
  1374  	{	\
  1375  		arm_store_memindex_float64((inst), (sfreg), (basereg), (disp), (indexreg), (shift), (scratchreg));	\
  1376  	}	\
  1377  	else	\
  1378  	{	\
  1379  		arm_store_memindex_float32((inst), (sfreg), (basereg), (disp), (indexreg), (shift), (scratchreg));	\
  1380  	}\
  1381  } while(0)
  1382  
  1383  /**
  1384   * Store the content of the double float register "dfreg" into the 64-bits memory area at position basereg+disp+(indexreg<<shift)
  1385   * NB: the scratch register has to be asked to the register allocator.
  1386   *     It can't be ARM_WORK, since it's already used
  1387   */
  1388  #define arm_store_memindex_float64(inst,dfreg,basereg,disp,indexreg,shift,scratchreg)	\
  1389  do {	\
  1390  	arm_shift_reg_imm8((inst), ARM_SHL, ARM_WORK, (indexreg), (shift));	\
  1391  	arm_alu_reg_reg((inst), ARM_ADD, (scratchreg), (basereg), ARM_WORK);	\
  1392  	arm_store_membase_float64((inst), (dfreg), (scratchreg), (disp));	\
  1393  } while (0)
  1394  
  1395  /**
  1396   * Store the content of the single float register "dfreg" into the 32-bits memory area at position basereg+disp+(indexreg<<shift)
  1397   * NB: the scratch register has to be asked to the register allocator.
  1398   *     It can't be ARM_WORK, since it's already used
  1399   */
  1400  #define arm_store_memindex_float32(inst,dfreg,basereg,disp,indexreg,shift,scratchreg)	\
  1401  do {	\
  1402  	arm_shift_reg_imm8((inst), ARM_SHL, ARM_WORK, (indexreg), (shift));	\
  1403  	arm_alu_reg_reg((inst), ARM_ADD, (scratchreg), (basereg), ARM_WORK);	\
  1404  	arm_store_membase_float32((inst), (dfreg), (scratchreg), (disp));	\
  1405  } while (0)
  1406  
  1407  #endif /* JIT_ARM_HAS_VFP */
  1408  
  1409  /**
  1410   * Store a value from a register (reg) into an address (basereg+imm).
  1411   *
  1412   */
  1413  #define arm_store_membase_either(inst,reg,basereg,imm,mask)	\
  1414  			do { \
  1415  				int __sm_offset = (int)(imm); \
  1416  				if(__sm_offset >= 0 && __sm_offset < (1 << 12)) \
  1417  				{ \
  1418  					arm_inst_add((inst), arm_prefix(0x05800000 | (mask)) | \
  1419  								(((unsigned int)(basereg)) << 16) | \
  1420  								(((unsigned int)(reg)) << 12) | \
  1421  								 ((unsigned int)__sm_offset)); \
  1422  				} \
  1423  				else if(__sm_offset > -(1 << 12) && __sm_offset < 0) \
  1424  				{ \
  1425  					arm_inst_add((inst), arm_prefix(0x05000000 | (mask)) | \
  1426  								(((unsigned int)(basereg)) << 16) | \
  1427  								(((unsigned int)(reg)) << 12) | \
  1428  								 ((unsigned int)(-__sm_offset))); \
  1429  				} \
  1430  				else \
  1431  				{ \
  1432  					assert(reg != ARM_WORK);	\
  1433  					assert(basereg!=ARM_WORK);	\
  1434  					arm_mov_reg_imm((inst), ARM_WORK, __sm_offset); \
  1435  					arm_inst_add((inst), arm_prefix(0x07800000 | (mask)) | \
  1436  								(((unsigned int)(basereg)) << 16) | \
  1437  								(((unsigned int)(reg)) << 12) | \
  1438  								 ((unsigned int)ARM_WORK)); \
  1439  				} \
  1440  			} while (0)
  1441  
  1442  /*
  1443   * The ARM STR instruction. The content of "reg" will be put in memory at the address given by the content of basereg + imm
  1444   */
  1445  #define	arm_store_membase(inst,reg,basereg,imm)	\
  1446  			do { \
  1447  				arm_store_membase_either((inst), (reg), (basereg), (imm), 0); \
  1448  			} while (0)
  1449  #define	arm_store_membase_byte(inst,reg,basereg,imm)	\
  1450  			do { \
  1451  				arm_store_membase_either((inst), (reg), (basereg), (imm), \
  1452  										 0x00400000); \
  1453  			} while (0)
  1454  #define	arm_store_membase_sbyte(inst,reg,basereg,imm)	\
  1455  			do { \
  1456  				arm_store_membase_byte((inst), (reg), (basereg), (imm)); \
  1457  			} while (0)
  1458  #define	arm_store_membase_short(inst,reg,basereg,imm)	\
  1459  			do { \
  1460  				arm_store_membase_either((inst), (reg), (basereg), (imm), \
  1461  										 0x00400000); \
  1462  				arm_shift_reg_imm8((inst), ARM_SHR, (reg), (reg), 8); \
  1463  				arm_store_membase_either((inst), (reg), (basereg), \
  1464  										 (imm) + 1, 0x00400000); \
  1465  			} while (0)
  1466  #define	arm_store_membase_ushort(inst,reg,basereg,imm)	\
  1467  			do { \
  1468  				arm_store_membase_short((inst), (reg), (basereg), (imm)); \
  1469  			} while (0)
  1470  
  1471  #ifdef JIT_ARM_HAS_FPA
  1472  /*
  1473   * Store a floating-point value to a memory address.
  1474   */
  1475  #define	arm_store_membase_float(inst,reg,basereg,imm,mask)	\
  1476  			do { \
  1477  				int __mb_offset = (int)(imm); \
  1478  				if(__mb_offset >= 0 && __mb_offset < (1 << 10) && \
  1479  				   (__mb_offset & 3) == 0) \
  1480  				{ \
  1481  					arm_inst_add((inst), arm_prefix(0x0D800100 | (mask)) | \
  1482  							(((unsigned int)(basereg)) << 16) | \
  1483  							(((unsigned int)(reg)) << 12) | \
  1484  							 ((unsigned int)((__mb_offset / 4) & 0xFF))); \
  1485  				} \
  1486  				else if(__mb_offset > -(1 << 10) && __mb_offset < 0 && \
  1487  				        (__mb_offset & 3) == 0) \
  1488  				{ \
  1489  					arm_inst_add((inst), arm_prefix(0x0D080100 | (mask)) | \
  1490  							(((unsigned int)(basereg)) << 16) | \
  1491  							(((unsigned int)(reg)) << 12) | \
  1492  							 ((unsigned int)(((-__mb_offset) / 4) & 0xFF)));\
  1493  				} \
  1494  				else \
  1495  				{ \
  1496  					arm_mov_reg_imm((inst), ARM_WORK, __mb_offset); \
  1497  					arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, \
  1498  								    (basereg), ARM_WORK); \
  1499  					arm_inst_add((inst), arm_prefix(0x0D800100 | (mask)) | \
  1500  							(((unsigned int)ARM_WORK) << 16) | \
  1501  							(((unsigned int)(reg)) << 12)); \
  1502  				} \
  1503  			} while (0)
  1504  #define	arm_store_membase_float32(inst,reg,basereg,imm)	\
  1505  			do { \
  1506  				arm_store_membase_float((inst), (reg), (basereg), (imm), 0); \
  1507  			} while (0)
  1508  #define	arm_store_membase_float64(inst,reg,basereg,imm)	\
  1509  			do { \
  1510  				arm_store_membase_float((inst), (reg), (basereg), \
  1511  									    (imm), 0x00008000); \
  1512  			} while (0)
  1513  #define	arm_push_reg_float32(inst,reg)	\
  1514  			do { \
  1515  				arm_store_membase_float((inst), (reg), ARM_SP, \
  1516  									    -4, 0x00200000); \
  1517  			} while (0)
  1518  #define	arm_push_reg_float64(inst,reg)	\
  1519  			do { \
  1520  				arm_store_membase_float((inst), (reg), ARM_SP, \
  1521  									    -4, 0x00208000); \
  1522  			} while (0)
  1523  
  1524  #endif /* JIT_ARM_HAS_FPA */
  1525  
  1526  #ifdef JIT_ARM_HAS_VFP
  1527  /**
  1528   * FSTS
  1529   * Store a floating-point value to a memory address.
  1530   */
  1531  #define arm_store_membase_float32(inst,reg,basereg,imm)	\
  1532  do { \
  1533  	unsigned int reg_top_4_bits = (reg & 0x1E) >> 1;	\
  1534  	unsigned int reg_bottom_bit = (reg & 0x01);	\
  1535  	int __mb_offset = (int)(imm);	\
  1536  	if(__mb_offset >= 0 && __mb_offset < (1 << 10) && (__mb_offset & 3) == 0)	\
  1537  	{	\
  1538  		arm_inst_add((inst), arm_prefix(0x0D800A00) |	\
  1539  			(((unsigned int)(basereg)) << 16) | 	\
  1540  			(((unsigned int)(reg_top_4_bits)) << 12) |	\
  1541  			(((unsigned int)(reg_bottom_bit)) << 22) |	\
  1542  			((unsigned int)((__mb_offset / 4) & 0xFF)));	\
  1543  	}	\
  1544  	else if(__mb_offset > -(1 << 10) && __mb_offset < 0 && (__mb_offset & 3) == 0)	\
  1545  	{	\
  1546  		arm_inst_add((inst), arm_prefix(0x0D000A00) |	\
  1547  			(((unsigned int)(basereg)) << 16) |	\
  1548  			(((unsigned int)(reg_top_4_bits)) << 12) |	\
  1549  			(((unsigned int)(reg_bottom_bit)) << 22) |	\
  1550  			((unsigned int)(((-__mb_offset) / 4) & 0xFF)));	\
  1551  	}	\
  1552  	else	\
  1553  	{ \
  1554  		assert(reg != ARM_WORK);	\
  1555  		assert(basereg!=ARM_WORK);	\
  1556  		if(__mb_offset > 0)	\
  1557  		{	\
  1558  			arm_mov_reg_imm((inst), ARM_WORK, __mb_offset); \
  1559  			arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, (basereg), ARM_WORK); \
  1560  		}	\
  1561  		else	\
  1562  		{	\
  1563  			arm_mov_reg_imm((inst), ARM_WORK, -__mb_offset); \
  1564  			arm_alu_reg_reg((inst), ARM_SUB, ARM_WORK, (basereg), ARM_WORK); \
  1565  		}	\
  1566  		arm_inst_add((inst), arm_prefix(0x0D800A00) |	\
  1567  			(((unsigned int)ARM_WORK) << 16) | 	\
  1568  			(((unsigned int)(reg_top_4_bits)) << 12) |	\
  1569  			(((unsigned int)(reg_bottom_bit)) << 22));	\
  1570  	} \
  1571  } while (0)
  1572  
  1573  /**
  1574  * FSTD
  1575  */
  1576  #define	arm_store_membase_float64(inst,reg,basereg,imm)	\
  1577  do { \
  1578  	int __mb_offset = (int)(imm); \
  1579  	if(__mb_offset >= 0 && __mb_offset < (1 << 10) && \
  1580  		(__mb_offset & 3) == 0) \
  1581  	{ \
  1582  		arm_inst_add((inst), arm_prefix(0x0D800B00 |	\
  1583  			(((unsigned int)(basereg)) << 16) | \
  1584  			(((unsigned int)(reg)) << 12) | 	\
  1585  			((unsigned int)((__mb_offset / 4) & 0xFF)))); \
  1586  	} \
  1587  	else if(__mb_offset > -(1 << 10) && __mb_offset < 0 && \
  1588  		(__mb_offset & 3) == 0) \
  1589  	{ \
  1590  		arm_inst_add((inst), arm_prefix(0x0D000B00 |	\
  1591  			(((unsigned int)(basereg)) << 16) | \
  1592  			(((unsigned int)(reg)) << 12) | 	\
  1593  			((unsigned int)(((-__mb_offset) / 4) & 0xFF))));\
  1594  	} \
  1595  	else \
  1596  	{ \
  1597  		assert(reg != ARM_WORK);	\
  1598  		assert(basereg!=ARM_WORK);	\
  1599  		if(__mb_offset > 0)	\
  1600  		{	\
  1601  			arm_mov_reg_imm((inst), ARM_WORK, __mb_offset);	\
  1602  			arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, (basereg), ARM_WORK);	\
  1603  		}	\
  1604  		else	\
  1605  		{	\
  1606  			arm_mov_reg_imm((inst), ARM_WORK, -__mb_offset);	\
  1607  			arm_alu_reg_reg((inst), ARM_SUB, ARM_WORK, (basereg), ARM_WORK);\
  1608  		}	\
  1609  		arm_inst_add((inst), arm_prefix(0x0D800B00 |	\
  1610  			(((unsigned int)ARM_WORK) << 16) | \
  1611  			(((unsigned int)(reg)) << 12)));\
  1612  	} \
  1613  } while (0)
  1614  
  1615  /*
  1616   * Floating point push/pop operations
  1617   */
  1618  #define	arm_push_reg_float64(inst,reg)	\
  1619  			do { \
  1620  				arm_store_membase_float64((inst), (reg), ARM_SP, -8); \
  1621  				arm_alu_reg_imm(inst, ARM_SUB, ARM_SP, ARM_SP, 8);	\
  1622  			} while (0)
  1623  			
  1624  #define	arm_push_reg_float32(inst,reg)	\
  1625  			do { \
  1626  				arm_store_membase_float32((inst), (reg), ARM_SP, -4); \
  1627  				arm_alu_reg_imm(inst, ARM_SUB, ARM_SP, ARM_SP, 4);	\
  1628  			} while (0)
  1629  
  1630  /**
  1631   * FMDRR (Floating-point Move to Double-precision Register from two Registers)
  1632   * Move a value from two ARM registers (lowsreg, highsreg) to a double-precision floating point register (dreg)
  1633   */
  1634  #define arm_mov_double_reg_reg(inst,dreg,lowsreg,highsreg)	\
  1635  do { \
  1636  	arm_inst_add((inst), arm_prefix(0x0C400B10) | \
  1637  	(((unsigned int)(lowsreg)) << 12) | \
  1638  	(((unsigned int)(highsreg)) << 16) | \
  1639  	((unsigned int)(dreg))); \
  1640  } while(0)
  1641  
  1642  /**
  1643  * FMRRD (Floating-point Move to two registers from Double-precision Register)
  1644  * Move a value from a double-precision floating point register (sreg) to two ARM registers (lowsreg, highsreg) 
  1645  */
  1646  #define arm_mov_reg_reg_double(inst,lowsreg,highsreg,sreg)	\
  1647  do { \
  1648  	arm_inst_add((inst), arm_prefix(0x0C500B10) | \
  1649  	(((unsigned int)(lowsreg)) << 12) | \
  1650  	(((unsigned int)(highsreg)) << 16) | \
  1651  	((unsigned int)(sreg))); \
  1652  } while(0)
  1653  
  1654  /**
  1655   * FMSR (Floating-point Move to Single-precision from Register)
  1656   * Move a value from one ARM registers (sreg) to a single-precision floating point register (dreg)
  1657   */
  1658  #define arm_mov_float_reg(inst,dreg,sreg)	\
  1659  do { \
  1660  	char dreg_top_4_bits = (dreg & 0x1E) >> 1;	\
  1661  	char dreg_bottom_bit = (dreg & 0x01);	\
  1662  	arm_inst_add((inst), arm_prefix(0x0E000A10) | 	\
  1663  	(((unsigned int)(sreg)) << 12) | 	\
  1664  	(((unsigned int)(dreg_top_4_bits)) << 16) |	\
  1665  	(((unsigned int)(dreg_bottom_bit)) << 7)); \
  1666  } while(0)
  1667  
  1668  /**
  1669  * FMRS (Floating-point Move to Register from Single-precision)
  1670  * Move a value from a single-precision floating point register (sreg) to an ARM registers (dreg) 
  1671  */
  1672  #define arm_mov_reg_float(inst,dreg,sreg)	\
  1673  do { \
  1674  	char sreg_top_4_bits = (sreg & 0x1E) >> 1;	\
  1675  	char sreg_bottom_bit = (sreg & 0x01);	\
  1676  	arm_inst_add((inst), arm_prefix(0x0E100A10) | 	\
  1677  	(((unsigned int)(dreg)) << 12) | 	\
  1678  	(((unsigned int)(sreg_top_4_bits)) << 16) |	\
  1679  	(((unsigned int)(sreg_bottom_bit)) << 7)); \
  1680  } while(0)
  1681  
  1682  /**
  1683  * FCVTDS (Floating-point Convert to Double-precision from Single-precision)
  1684  * dreg is the double precision destination register
  1685  * sreg is the single precision source register
  1686  */
  1687  #define arm_convert_float_double_single(inst,dreg,sreg)	\
  1688  {	\
  1689  	unsigned char sreg_top_4_bits = (sreg & 0x1E) >> 1;	\
  1690  	unsigned char sreg_bottom_bit = (sreg & 0x01);	\
  1691  	arm_inst_add((inst), arm_prefix(0x0EB70AC0) |	\
  1692  		(((unsigned int)(sreg_top_4_bits))) |	\
  1693  		(((unsigned int)(sreg_bottom_bit)) << 5) |	\
  1694  		(((unsigned int)(dreg)) << 12));	\
  1695  }
  1696  
  1697  /**
  1698   * FCVTSD (Floating-point Convert to Single-precision from Double-precision)
  1699   * dreg is the single precision destination register
  1700   * sreg is the double precision source register
  1701   */
  1702  #define arm_convert_float_single_double(inst,dreg,sreg)	\
  1703  {	\
  1704  	unsigned char dreg_top_4_bits = (dreg & 0x1E) >> 1;	\
  1705  	unsigned char dreg_bottom_bit = (dreg & 0x01);	\
  1706  	arm_inst_add((inst), arm_prefix(0x0EB70BC0) |	\
  1707  		(((unsigned int)(dreg_top_4_bits)) << 12) |	\
  1708  		(((unsigned int)(dreg_bottom_bit)) << 22) |	\
  1709  		((unsigned int)(sreg)));	\
  1710  }
  1711  
  1712  /**
  1713   * FSITOD (Floating-point Convert Signed Integer to Double-precision)
  1714   * sreg is the single precision register containing the integer value to be converted
  1715   * dreg is the double precision destination register
  1716   */
  1717  #define arm_convert_float_signed_integer_double(inst,dreg,sreg)	\
  1718  	unsigned char sreg_top_4_bits = (sreg & 0x1E) >> 1;	\
  1719  	unsigned char sreg_bottom_bit = (sreg & 0x01);	\
  1720  	arm_inst_add((inst), arm_prefix(0x0EB80BC0) |	\
  1721  		(((unsigned int)(dreg)) << 12) |	\
  1722  		(((unsigned int)(sreg_bottom_bit)) << 5) |	\
  1723  		((unsigned int)(sreg_top_4_bits)));
  1724  
  1725  #endif /* JIT_ARM_HAS_VFP */
  1726  
  1727  /*
  1728   * Load a value from an indexed address into a register.
  1729   */
  1730  #define arm_load_memindex_either(inst,reg,basereg,indexreg,shift,mask)	\
  1731  			do { \
  1732  				arm_inst_add((inst), arm_prefix(0x07900000 | (mask)) | \
  1733  							(((unsigned int)(basereg)) << 16) | \
  1734  							(((unsigned int)(reg)) << 12) | \
  1735  							(((unsigned int)(shift)) << 7) | \
  1736  							 ((unsigned int)(indexreg))); \
  1737  			} while (0)
  1738  #define	arm_load_memindex(inst,reg,basereg,indexreg)	\
  1739  			do { \
  1740  				arm_load_memindex_either((inst), (reg), (basereg), \
  1741  										 (indexreg), 2, 0); \
  1742  			} while (0)
  1743  #define	arm_load_memindex_byte(inst,reg,basereg,indexreg)	\
  1744  			do { \
  1745  				arm_load_memindex_either((inst), (reg), (basereg), \
  1746  									     (indexreg), 0, 0x00400000); \
  1747  			} while (0)
  1748  #define	arm_load_memindex_sbyte(inst,reg,basereg,indexreg)	\
  1749  			do { \
  1750  				arm_load_memindex_either((inst), (reg), (basereg), \
  1751  									     (indexreg), 0, 0x00400000); \
  1752  				arm_shift_reg_imm8((inst), ARM_SHL, (reg), (reg), 24); \
  1753  				arm_shift_reg_imm8((inst), ARM_SAR, (reg), (reg), 24); \
  1754  			} while (0)
  1755  #define	arm_load_memindex_ushort(inst,reg,basereg,indexreg)	\
  1756  			do { \
  1757  				arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, (basereg), \
  1758  								(indexreg)); \
  1759  				arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, ARM_WORK, \
  1760  								(indexreg)); \
  1761  				arm_load_membase_byte((inst), (reg), ARM_WORK, 0); \
  1762  				arm_load_membase_byte((inst), ARM_WORK, ARM_WORK, 1); \
  1763  				arm_shift_reg_imm8((inst), ARM_SHL, ARM_WORK, ARM_WORK, 8); \
  1764  				arm_alu_reg_reg((inst), ARM_ORR, (reg), (reg), ARM_WORK); \
  1765  			} while (0)
  1766  #define	arm_load_memindex_short(inst,reg,basereg,indexreg)	\
  1767  			do { \
  1768  				arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, (basereg), \
  1769  								(indexreg)); \
  1770  				arm_alu_reg_reg((inst), ARM_ADD, ARM_WORK, ARM_WORK, \
  1771  								(indexreg)); \
  1772  				arm_load_membase_byte((inst), (reg), ARM_WORK, 0); \
  1773  				arm_load_membase_byte((inst), ARM_WORK, ARM_WORK, 1); \
  1774  				arm_shift_reg_imm8((inst), ARM_SHL, ARM_WORK, ARM_WORK, 24); \
  1775  				arm_shift_reg_imm8((inst), ARM_SAR, ARM_WORK, ARM_WORK, 16); \
  1776  				arm_alu_reg_reg((inst), ARM_ORR, (reg), (reg), ARM_WORK); \
  1777  			} while (0)
  1778  
  1779  /*
  1780   * Store a value from a register into an indexed address.
  1781   *
  1782   * Note: storing a 16-bit value destroys the values in the base
  1783   * register and the source register.
  1784   */
  1785  #define arm_store_memindex_either(inst,reg,basereg,indexreg,shift,mask)	\
  1786  			do { \
  1787  				arm_inst_add((inst), arm_prefix(0x07800000 | (mask)) | \
  1788  							(((unsigned int)(basereg)) << 16) | \
  1789  							(((unsigned int)(reg)) << 12) | \
  1790  							(((unsigned int)(shift)) << 7) | \
  1791  							 ((unsigned int)(indexreg))); \
  1792  			} while (0)
  1793  #define	arm_store_memindex(inst,reg,basereg,indexreg)	\
  1794  			do { \
  1795  				arm_store_memindex_either((inst), (reg), (basereg), \
  1796  										  (indexreg), 2, 0); \
  1797  			} while (0)
  1798  #define	arm_store_memindex_byte(inst,reg,basereg,indexreg)	\
  1799  			do { \
  1800  				arm_store_memindex_either((inst), (reg), (basereg), \
  1801  										  (indexreg), 0, 0x00400000); \
  1802  			} while (0)
  1803  #define	arm_store_memindex_sbyte(inst,reg,basereg,indexreg)	\
  1804  			do { \
  1805  				arm_store_memindex_byte((inst), (reg), (basereg), \
  1806  										(indexreg)); \
  1807  			} while (0)
  1808  #define	arm_store_memindex_short(inst,reg,basereg,indexreg)	\
  1809  			do { \
  1810  				arm_store_memindex_either((inst), (reg), (basereg), \
  1811  										  (indexreg), 1, 0x00400000); \
  1812  				arm_alu_reg_imm8((inst), ARM_ADD, (basereg), (basereg), 1); \
  1813  				arm_shift_reg_imm8((inst), ARM_SHR, (reg), (reg), 8); \
  1814  				arm_store_memindex_either((inst), (reg), (basereg), \
  1815  										  (indexreg), 1, 0x00400000); \
  1816  			} while (0)
  1817  #define	arm_store_memindex_ushort(inst,reg,basereg,indexreg)	\
  1818  			do { \
  1819  				arm_store_memindex_short((inst), (reg), \
  1820  										 (basereg), (indexreg)); \
  1821  			} while (0)
  1822  
  1823  #ifdef __cplusplus
  1824  };
  1825  #endif
  1826  
  1827  #endif /* _ARM_CODEGEN_H */