github.com/core-coin/go-core/v2@v2.1.9/crypto/bn256/cloudflare/mul_arm64.h (about) 1 #define mul(c0,c1,c2,c3,c4,c5,c6,c7) \ 2 MUL R1, R5, c0 \ 3 UMULH R1, R5, c1 \ 4 MUL R1, R6, R0 \ 5 ADDS R0, c1 \ 6 UMULH R1, R6, c2 \ 7 MUL R1, R7, R0 \ 8 ADCS R0, c2 \ 9 UMULH R1, R7, c3 \ 10 MUL R1, R8, R0 \ 11 ADCS R0, c3 \ 12 UMULH R1, R8, c4 \ 13 ADCS ZR, c4 \ 14 \ 15 MUL R2, R5, R1 \ 16 UMULH R2, R5, R26 \ 17 MUL R2, R6, R0 \ 18 ADDS R0, R26 \ 19 UMULH R2, R6, R27 \ 20 MUL R2, R7, R0 \ 21 ADCS R0, R27 \ 22 UMULH R2, R7, R29 \ 23 MUL R2, R8, R0 \ 24 ADCS R0, R29 \ 25 UMULH R2, R8, c5 \ 26 ADCS ZR, c5 \ 27 ADDS R1, c1 \ 28 ADCS R26, c2 \ 29 ADCS R27, c3 \ 30 ADCS R29, c4 \ 31 ADCS ZR, c5 \ 32 \ 33 MUL R3, R5, R1 \ 34 UMULH R3, R5, R26 \ 35 MUL R3, R6, R0 \ 36 ADDS R0, R26 \ 37 UMULH R3, R6, R27 \ 38 MUL R3, R7, R0 \ 39 ADCS R0, R27 \ 40 UMULH R3, R7, R29 \ 41 MUL R3, R8, R0 \ 42 ADCS R0, R29 \ 43 UMULH R3, R8, c6 \ 44 ADCS ZR, c6 \ 45 ADDS R1, c2 \ 46 ADCS R26, c3 \ 47 ADCS R27, c4 \ 48 ADCS R29, c5 \ 49 ADCS ZR, c6 \ 50 \ 51 MUL R4, R5, R1 \ 52 UMULH R4, R5, R26 \ 53 MUL R4, R6, R0 \ 54 ADDS R0, R26 \ 55 UMULH R4, R6, R27 \ 56 MUL R4, R7, R0 \ 57 ADCS R0, R27 \ 58 UMULH R4, R7, R29 \ 59 MUL R4, R8, R0 \ 60 ADCS R0, R29 \ 61 UMULH R4, R8, c7 \ 62 ADCS ZR, c7 \ 63 ADDS R1, c3 \ 64 ADCS R26, c4 \ 65 ADCS R27, c5 \ 66 ADCS R29, c6 \ 67 ADCS ZR, c7 68 69 #define gfpReduce() \ 70 \ // m = (T * N') mod R, store m in R1:R2:R3:R4 71 MOVD ·np+0(SB), R17 \ 72 MOVD ·np+8(SB), R25 \ 73 MOVD ·np+16(SB), R19 \ 74 MOVD ·np+24(SB), R20 \ 75 \ 76 MUL R9, R17, R1 \ 77 UMULH R9, R17, R2 \ 78 MUL R9, R25, R0 \ 79 ADDS R0, R2 \ 80 UMULH R9, R25, R3 \ 81 MUL R9, R19, R0 \ 82 ADCS R0, R3 \ 83 UMULH R9, R19, R4 \ 84 MUL R9, R20, R0 \ 85 ADCS R0, R4 \ 86 \ 87 MUL R10, R17, R21 \ 88 UMULH R10, R17, R22 \ 89 MUL R10, R25, R0 \ 90 ADDS R0, R22 \ 91 UMULH R10, R25, R23 \ 92 MUL R10, R19, R0 \ 93 ADCS R0, R23 \ 94 ADDS R21, R2 \ 95 ADCS R22, R3 \ 96 ADCS R23, R4 \ 97 \ 98 MUL R11, R17, R21 \ 99 UMULH R11, R17, R22 \ 100 MUL R11, R25, R0 \ 101 ADDS R0, R22 \ 102 ADDS R21, R3 \ 103 ADCS R22, R4 \ 104 \ 105 MUL R12, R17, R21 \ 106 ADDS R21, R4 \ 107 \ 108 \ // m * N 109 loadModulus(R5,R6,R7,R8) \ 110 mul(R17,R25,R19,R20,R21,R22,R23,R24) \ 111 \ 112 \ // Add the 512-bit intermediate to m*N 113 MOVD ZR, R0 \ 114 ADDS R9, R17 \ 115 ADCS R10, R25 \ 116 ADCS R11, R19 \ 117 ADCS R12, R20 \ 118 ADCS R13, R21 \ 119 ADCS R14, R22 \ 120 ADCS R15, R23 \ 121 ADCS R16, R24 \ 122 ADCS ZR, R0 \ 123 \ 124 \ // Our output is R21:R22:R23:R24. Reduce mod p if necessary. 125 SUBS R5, R21, R10 \ 126 SBCS R6, R22, R11 \ 127 SBCS R7, R23, R12 \ 128 SBCS R8, R24, R13 \ 129 \ 130 CSEL CS, R10, R21, R1 \ 131 CSEL CS, R11, R22, R2 \ 132 CSEL CS, R12, R23, R3 \ 133 CSEL CS, R13, R24, R4