github.com/emmansun/gmsm@v0.29.1/sm9/bn256/select_arm64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  #define res_ptr R0
     6  #define a_ptr R1
     7  #define b_ptr R2
     8  
     9  /* ---------------------------------------*/
    10  // func gfpCopy(res, a *gfP)
    11  TEXT ·gfpCopy(SB),NOSPLIT,$0
    12  	MOVD res+0(FP), res_ptr
    13  	MOVD a+8(FP), a_ptr
    14  
    15  	VLD1	(a_ptr), [V0.B16, V1.B16]
    16  	VST1	[V0.B16, V1.B16], (res_ptr)
    17  
    18  	RET
    19  
    20  /* ---------------------------------------*/
    21  // func gfp2Copy(res, a *gfP2)
    22  TEXT ·gfp2Copy(SB),NOSPLIT,$0
    23  	MOVD res+0(FP), res_ptr
    24  	MOVD a+8(FP), a_ptr
    25  
    26  	VLD1	(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    27  	VST1	[V0.B16, V1.B16, V2.B16, V3.B16], (res_ptr)
    28  
    29  	RET
    30  
    31  /* ---------------------------------------*/
    32  // func gfp4Copy(res, a *gfP4)
    33  TEXT ·gfp4Copy(SB),NOSPLIT,$0
    34  	MOVD res+0(FP), res_ptr
    35  	MOVD a+8(FP), a_ptr
    36  
    37  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    38  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    39  
    40  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    41  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    42  
    43  	RET
    44  
    45  /* ---------------------------------------*/
    46  // func gfp6Copy(res, a *gfP6)
    47  TEXT ·gfp6Copy(SB),NOSPLIT,$0
    48  	MOVD res+0(FP), res_ptr
    49  	MOVD a+8(FP), a_ptr
    50  
    51  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    52  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    53  
    54  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    55  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    56  
    57  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    58  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    59  
    60  	RET
    61  
    62  /* ---------------------------------------*/
    63  // func gfp12Copy(res, a *gfP12)
    64  TEXT ·gfp12Copy(SB),NOSPLIT,$0
    65  	MOVD res+0(FP), res_ptr
    66  	MOVD a+8(FP), a_ptr
    67  
    68  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    69  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    70  
    71  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    72  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    73  
    74  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    75  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    76  
    77  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    78  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    79  
    80  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    81  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    82  
    83  	VLD1.P	64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
    84  	VST1.P	[V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
    85  		
    86  	RET
    87  
    88  /* ---------------------------------------*/
    89  // func gfP12MovCond(res, a, b *gfP12, cond int)
    90  // If cond == 0 res=b, else res=a
    91  TEXT ·gfP12MovCond(SB),NOSPLIT,$0
    92  	MOVD	res+0(FP), res_ptr
    93  	MOVD	a+8(FP), a_ptr
    94  	MOVD	b+16(FP), b_ptr
    95  	MOVD	cond+24(FP), R3
    96  
    97  	VEOR V0.B16, V0.B16, V0.B16
    98  	VMOV R3, V1.S4
    99  	VCMEQ V0.S4, V1.S4, V2.S4
   100  
   101  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   102  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   103  	VBIT V2.B16, V7.B16, V3.B16
   104  	VBIT V2.B16, V8.B16, V4.B16
   105  	VBIT V2.B16, V9.B16, V5.B16
   106  	VBIT V2.B16, V10.B16, V6.B16
   107  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   108  
   109  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   110  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   111  	VBIT V2.B16, V7.B16, V3.B16
   112  	VBIT V2.B16, V8.B16, V4.B16
   113  	VBIT V2.B16, V9.B16, V5.B16
   114  	VBIT V2.B16, V10.B16, V6.B16
   115  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   116  
   117  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   118  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   119  	VBIT V2.B16, V7.B16, V3.B16
   120  	VBIT V2.B16, V8.B16, V4.B16
   121  	VBIT V2.B16, V9.B16, V5.B16
   122  	VBIT V2.B16, V10.B16, V6.B16
   123  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   124  
   125  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   126  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   127  	VBIT V2.B16, V7.B16, V3.B16
   128  	VBIT V2.B16, V8.B16, V4.B16
   129  	VBIT V2.B16, V9.B16, V5.B16
   130  	VBIT V2.B16, V10.B16, V6.B16
   131  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   132  
   133  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   134  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   135  	VBIT V2.B16, V7.B16, V3.B16
   136  	VBIT V2.B16, V8.B16, V4.B16
   137  	VBIT V2.B16, V9.B16, V5.B16
   138  	VBIT V2.B16, V10.B16, V6.B16
   139  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   140  
   141  	VLD1 (a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   142  	VLD1 (b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   143  	VBIT V2.B16, V7.B16, V3.B16
   144  	VBIT V2.B16, V8.B16, V4.B16
   145  	VBIT V2.B16, V9.B16, V5.B16
   146  	VBIT V2.B16, V10.B16, V6.B16
   147  	VST1 [V3.B16, V4.B16, V5.B16, V6.B16], (res_ptr)
   148  
   149  	RET
   150  
   151  /* ---------------------------------------*/
   152  // func curvePointMovCond(res, a, b *curvePoint, cond int)
   153  // If cond == 0 res=b, else res=a
   154  TEXT ·curvePointMovCond(SB),NOSPLIT,$0
   155  	MOVD	res+0(FP), res_ptr
   156  	MOVD	a+8(FP), a_ptr
   157  	MOVD	b+16(FP), b_ptr
   158  	MOVD	cond+24(FP), R3
   159  
   160  	VEOR V0.B16, V0.B16, V0.B16
   161  	VMOV R3, V1.S4
   162  	VCMEQ V0.S4, V1.S4, V2.S4
   163  
   164  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   165  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   166  	VBIT V2.B16, V7.B16, V3.B16
   167  	VBIT V2.B16, V8.B16, V4.B16
   168  	VBIT V2.B16, V9.B16, V5.B16
   169  	VBIT V2.B16, V10.B16, V6.B16
   170  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   171  
   172  	VLD1 (a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   173  	VLD1 (b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   174  	VBIT V2.B16, V7.B16, V3.B16
   175  	VBIT V2.B16, V8.B16, V4.B16
   176  	VBIT V2.B16, V9.B16, V5.B16
   177  	VBIT V2.B16, V10.B16, V6.B16
   178  	VST1 [V3.B16, V4.B16, V5.B16, V6.B16], (res_ptr)
   179  
   180  	RET
   181  
   182  /* ---------------------------------------*/
   183  // func twistPointMovCond(res, a, b *twistPoint, cond int)
   184  // If cond == 0 res=b, else res=a
   185  TEXT ·twistPointMovCond(SB),NOSPLIT,$0
   186  	MOVD	res+0(FP), res_ptr
   187  	MOVD	a+8(FP), a_ptr
   188  	MOVD	b+16(FP), b_ptr
   189  	MOVD	cond+24(FP), R3
   190  
   191  	VEOR V0.B16, V0.B16, V0.B16
   192  	VMOV R3, V1.S4
   193  	VCMEQ V0.S4, V1.S4, V2.S4
   194  
   195  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   196  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   197  	VBIT V2.B16, V7.B16, V3.B16
   198  	VBIT V2.B16, V8.B16, V4.B16
   199  	VBIT V2.B16, V9.B16, V5.B16
   200  	VBIT V2.B16, V10.B16, V6.B16
   201  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   202  
   203  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   204  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   205  	VBIT V2.B16, V7.B16, V3.B16
   206  	VBIT V2.B16, V8.B16, V4.B16
   207  	VBIT V2.B16, V9.B16, V5.B16
   208  	VBIT V2.B16, V10.B16, V6.B16
   209  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   210  
   211  	VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   212  	VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   213  	VBIT V2.B16, V7.B16, V3.B16
   214  	VBIT V2.B16, V8.B16, V4.B16
   215  	VBIT V2.B16, V9.B16, V5.B16
   216  	VBIT V2.B16, V10.B16, V6.B16
   217  	VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr)
   218  
   219  	VLD1 (a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16]
   220  	VLD1 (b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16]
   221  	VBIT V2.B16, V7.B16, V3.B16
   222  	VBIT V2.B16, V8.B16, V4.B16
   223  	VBIT V2.B16, V9.B16, V5.B16
   224  	VBIT V2.B16, V10.B16, V6.B16
   225  	VST1 [V3.B16, V4.B16, V5.B16, V6.B16], (res_ptr)
   226  
   227  	RET