github.com/emmansun/gmsm@v0.29.1/sm9/bn256/select_arm64.s (about) 1 //go:build !purego 2 3 #include "textflag.h" 4 5 #define res_ptr R0 6 #define a_ptr R1 7 #define b_ptr R2 8 9 /* ---------------------------------------*/ 10 // func gfpCopy(res, a *gfP) 11 TEXT ·gfpCopy(SB),NOSPLIT,$0 12 MOVD res+0(FP), res_ptr 13 MOVD a+8(FP), a_ptr 14 15 VLD1 (a_ptr), [V0.B16, V1.B16] 16 VST1 [V0.B16, V1.B16], (res_ptr) 17 18 RET 19 20 /* ---------------------------------------*/ 21 // func gfp2Copy(res, a *gfP2) 22 TEXT ·gfp2Copy(SB),NOSPLIT,$0 23 MOVD res+0(FP), res_ptr 24 MOVD a+8(FP), a_ptr 25 26 VLD1 (a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 27 VST1 [V0.B16, V1.B16, V2.B16, V3.B16], (res_ptr) 28 29 RET 30 31 /* ---------------------------------------*/ 32 // func gfp4Copy(res, a *gfP4) 33 TEXT ·gfp4Copy(SB),NOSPLIT,$0 34 MOVD res+0(FP), res_ptr 35 MOVD a+8(FP), a_ptr 36 37 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 38 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 39 40 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 41 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 42 43 RET 44 45 /* ---------------------------------------*/ 46 // func gfp6Copy(res, a *gfP6) 47 TEXT ·gfp6Copy(SB),NOSPLIT,$0 48 MOVD res+0(FP), res_ptr 49 MOVD a+8(FP), a_ptr 50 51 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 52 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 53 54 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 55 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 56 57 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 58 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 59 60 RET 61 62 /* ---------------------------------------*/ 63 // func gfp12Copy(res, a *gfP12) 64 TEXT ·gfp12Copy(SB),NOSPLIT,$0 65 MOVD res+0(FP), res_ptr 66 MOVD a+8(FP), a_ptr 67 68 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 69 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 70 71 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 72 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 73 74 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 75 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 76 77 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 78 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 79 80 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 81 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 82 83 VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] 84 VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) 85 86 RET 87 88 /* ---------------------------------------*/ 89 // func gfP12MovCond(res, a, b *gfP12, cond int) 90 // If cond == 0 res=b, else res=a 91 TEXT ·gfP12MovCond(SB),NOSPLIT,$0 92 MOVD res+0(FP), res_ptr 93 MOVD a+8(FP), a_ptr 94 MOVD b+16(FP), b_ptr 95 MOVD cond+24(FP), R3 96 97 VEOR V0.B16, V0.B16, V0.B16 98 VMOV R3, V1.S4 99 VCMEQ V0.S4, V1.S4, V2.S4 100 101 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 102 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 103 VBIT V2.B16, V7.B16, V3.B16 104 VBIT V2.B16, V8.B16, V4.B16 105 VBIT V2.B16, V9.B16, V5.B16 106 VBIT V2.B16, V10.B16, V6.B16 107 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 108 109 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 110 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 111 VBIT V2.B16, V7.B16, V3.B16 112 VBIT V2.B16, V8.B16, V4.B16 113 VBIT V2.B16, V9.B16, V5.B16 114 VBIT V2.B16, V10.B16, V6.B16 115 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 116 117 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 118 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 119 VBIT V2.B16, V7.B16, V3.B16 120 VBIT V2.B16, V8.B16, V4.B16 121 VBIT V2.B16, V9.B16, V5.B16 122 VBIT V2.B16, V10.B16, V6.B16 123 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 124 125 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 126 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 127 VBIT V2.B16, V7.B16, V3.B16 128 VBIT V2.B16, V8.B16, V4.B16 129 VBIT V2.B16, V9.B16, V5.B16 130 VBIT V2.B16, V10.B16, V6.B16 131 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 132 133 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 134 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 135 VBIT V2.B16, V7.B16, V3.B16 136 VBIT V2.B16, V8.B16, V4.B16 137 VBIT V2.B16, V9.B16, V5.B16 138 VBIT V2.B16, V10.B16, V6.B16 139 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 140 141 VLD1 (a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 142 VLD1 (b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 143 VBIT V2.B16, V7.B16, V3.B16 144 VBIT V2.B16, V8.B16, V4.B16 145 VBIT V2.B16, V9.B16, V5.B16 146 VBIT V2.B16, V10.B16, V6.B16 147 VST1 [V3.B16, V4.B16, V5.B16, V6.B16], (res_ptr) 148 149 RET 150 151 /* ---------------------------------------*/ 152 // func curvePointMovCond(res, a, b *curvePoint, cond int) 153 // If cond == 0 res=b, else res=a 154 TEXT ·curvePointMovCond(SB),NOSPLIT,$0 155 MOVD res+0(FP), res_ptr 156 MOVD a+8(FP), a_ptr 157 MOVD b+16(FP), b_ptr 158 MOVD cond+24(FP), R3 159 160 VEOR V0.B16, V0.B16, V0.B16 161 VMOV R3, V1.S4 162 VCMEQ V0.S4, V1.S4, V2.S4 163 164 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 165 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 166 VBIT V2.B16, V7.B16, V3.B16 167 VBIT V2.B16, V8.B16, V4.B16 168 VBIT V2.B16, V9.B16, V5.B16 169 VBIT V2.B16, V10.B16, V6.B16 170 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 171 172 VLD1 (a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 173 VLD1 (b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 174 VBIT V2.B16, V7.B16, V3.B16 175 VBIT V2.B16, V8.B16, V4.B16 176 VBIT V2.B16, V9.B16, V5.B16 177 VBIT V2.B16, V10.B16, V6.B16 178 VST1 [V3.B16, V4.B16, V5.B16, V6.B16], (res_ptr) 179 180 RET 181 182 /* ---------------------------------------*/ 183 // func twistPointMovCond(res, a, b *twistPoint, cond int) 184 // If cond == 0 res=b, else res=a 185 TEXT ·twistPointMovCond(SB),NOSPLIT,$0 186 MOVD res+0(FP), res_ptr 187 MOVD a+8(FP), a_ptr 188 MOVD b+16(FP), b_ptr 189 MOVD cond+24(FP), R3 190 191 VEOR V0.B16, V0.B16, V0.B16 192 VMOV R3, V1.S4 193 VCMEQ V0.S4, V1.S4, V2.S4 194 195 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 196 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 197 VBIT V2.B16, V7.B16, V3.B16 198 VBIT V2.B16, V8.B16, V4.B16 199 VBIT V2.B16, V9.B16, V5.B16 200 VBIT V2.B16, V10.B16, V6.B16 201 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 202 203 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 204 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 205 VBIT V2.B16, V7.B16, V3.B16 206 VBIT V2.B16, V8.B16, V4.B16 207 VBIT V2.B16, V9.B16, V5.B16 208 VBIT V2.B16, V10.B16, V6.B16 209 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 210 211 VLD1.P (64)(a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 212 VLD1.P (64)(b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 213 VBIT V2.B16, V7.B16, V3.B16 214 VBIT V2.B16, V8.B16, V4.B16 215 VBIT V2.B16, V9.B16, V5.B16 216 VBIT V2.B16, V10.B16, V6.B16 217 VST1.P [V3.B16, V4.B16, V5.B16, V6.B16], (64)(res_ptr) 218 219 VLD1 (a_ptr), [V3.B16, V4.B16, V5.B16, V6.B16] 220 VLD1 (b_ptr), [V7.B16, V8.B16, V9.B16, V10.B16] 221 VBIT V2.B16, V7.B16, V3.B16 222 VBIT V2.B16, V8.B16, V4.B16 223 VBIT V2.B16, V9.B16, V5.B16 224 VBIT V2.B16, V10.B16, V6.B16 225 VST1 [V3.B16, V4.B16, V5.B16, V6.B16], (res_ptr) 226 227 RET