github.com/arjunbeliever/ignite@v0.0.0-20220406110515-46bbbbec2587/crypto/bn256/cloudflare/mul_amd64.h (about) 1 #define mul(a0,a1,a2,a3, rb, stack) \ 2 MOVQ a0, AX \ 3 MULQ 0+rb \ 4 MOVQ AX, R8 \ 5 MOVQ DX, R9 \ 6 MOVQ a0, AX \ 7 MULQ 8+rb \ 8 ADDQ AX, R9 \ 9 ADCQ $0, DX \ 10 MOVQ DX, R10 \ 11 MOVQ a0, AX \ 12 MULQ 16+rb \ 13 ADDQ AX, R10 \ 14 ADCQ $0, DX \ 15 MOVQ DX, R11 \ 16 MOVQ a0, AX \ 17 MULQ 24+rb \ 18 ADDQ AX, R11 \ 19 ADCQ $0, DX \ 20 MOVQ DX, R12 \ 21 \ 22 storeBlock(R8,R9,R10,R11, 0+stack) \ 23 MOVQ R12, 32+stack \ 24 \ 25 MOVQ a1, AX \ 26 MULQ 0+rb \ 27 MOVQ AX, R8 \ 28 MOVQ DX, R9 \ 29 MOVQ a1, AX \ 30 MULQ 8+rb \ 31 ADDQ AX, R9 \ 32 ADCQ $0, DX \ 33 MOVQ DX, R10 \ 34 MOVQ a1, AX \ 35 MULQ 16+rb \ 36 ADDQ AX, R10 \ 37 ADCQ $0, DX \ 38 MOVQ DX, R11 \ 39 MOVQ a1, AX \ 40 MULQ 24+rb \ 41 ADDQ AX, R11 \ 42 ADCQ $0, DX \ 43 MOVQ DX, R12 \ 44 \ 45 ADDQ 8+stack, R8 \ 46 ADCQ 16+stack, R9 \ 47 ADCQ 24+stack, R10 \ 48 ADCQ 32+stack, R11 \ 49 ADCQ $0, R12 \ 50 storeBlock(R8,R9,R10,R11, 8+stack) \ 51 MOVQ R12, 40+stack \ 52 \ 53 MOVQ a2, AX \ 54 MULQ 0+rb \ 55 MOVQ AX, R8 \ 56 MOVQ DX, R9 \ 57 MOVQ a2, AX \ 58 MULQ 8+rb \ 59 ADDQ AX, R9 \ 60 ADCQ $0, DX \ 61 MOVQ DX, R10 \ 62 MOVQ a2, AX \ 63 MULQ 16+rb \ 64 ADDQ AX, R10 \ 65 ADCQ $0, DX \ 66 MOVQ DX, R11 \ 67 MOVQ a2, AX \ 68 MULQ 24+rb \ 69 ADDQ AX, R11 \ 70 ADCQ $0, DX \ 71 MOVQ DX, R12 \ 72 \ 73 ADDQ 16+stack, R8 \ 74 ADCQ 24+stack, R9 \ 75 ADCQ 32+stack, R10 \ 76 ADCQ 40+stack, R11 \ 77 ADCQ $0, R12 \ 78 storeBlock(R8,R9,R10,R11, 16+stack) \ 79 MOVQ R12, 48+stack \ 80 \ 81 MOVQ a3, AX \ 82 MULQ 0+rb \ 83 MOVQ AX, R8 \ 84 MOVQ DX, R9 \ 85 MOVQ a3, AX \ 86 MULQ 8+rb \ 87 ADDQ AX, R9 \ 88 ADCQ $0, DX \ 89 MOVQ DX, R10 \ 90 MOVQ a3, AX \ 91 MULQ 16+rb \ 92 ADDQ AX, R10 \ 93 ADCQ $0, DX \ 94 MOVQ DX, R11 \ 95 MOVQ a3, AX \ 96 MULQ 24+rb \ 97 ADDQ AX, R11 \ 98 ADCQ $0, DX \ 99 MOVQ DX, R12 \ 100 \ 101 ADDQ 24+stack, R8 \ 102 ADCQ 32+stack, R9 \ 103 ADCQ 40+stack, R10 \ 104 ADCQ 48+stack, R11 \ 105 ADCQ $0, R12 \ 106 storeBlock(R8,R9,R10,R11, 24+stack) \ 107 MOVQ R12, 56+stack 108 109 #define gfpReduce(stack) \ 110 \ // m = (T * N') mod R, store m in R8:R9:R10:R11 111 MOVQ ·np+0(SB), AX \ 112 MULQ 0+stack \ 113 MOVQ AX, R8 \ 114 MOVQ DX, R9 \ 115 MOVQ ·np+0(SB), AX \ 116 MULQ 8+stack \ 117 ADDQ AX, R9 \ 118 ADCQ $0, DX \ 119 MOVQ DX, R10 \ 120 MOVQ ·np+0(SB), AX \ 121 MULQ 16+stack \ 122 ADDQ AX, R10 \ 123 ADCQ $0, DX \ 124 MOVQ DX, R11 \ 125 MOVQ ·np+0(SB), AX \ 126 MULQ 24+stack \ 127 ADDQ AX, R11 \ 128 \ 129 MOVQ ·np+8(SB), AX \ 130 MULQ 0+stack \ 131 MOVQ AX, R12 \ 132 MOVQ DX, R13 \ 133 MOVQ ·np+8(SB), AX \ 134 MULQ 8+stack \ 135 ADDQ AX, R13 \ 136 ADCQ $0, DX \ 137 MOVQ DX, R14 \ 138 MOVQ ·np+8(SB), AX \ 139 MULQ 16+stack \ 140 ADDQ AX, R14 \ 141 \ 142 ADDQ R12, R9 \ 143 ADCQ R13, R10 \ 144 ADCQ R14, R11 \ 145 \ 146 MOVQ ·np+16(SB), AX \ 147 MULQ 0+stack \ 148 MOVQ AX, R12 \ 149 MOVQ DX, R13 \ 150 MOVQ ·np+16(SB), AX \ 151 MULQ 8+stack \ 152 ADDQ AX, R13 \ 153 \ 154 ADDQ R12, R10 \ 155 ADCQ R13, R11 \ 156 \ 157 MOVQ ·np+24(SB), AX \ 158 MULQ 0+stack \ 159 ADDQ AX, R11 \ 160 \ 161 storeBlock(R8,R9,R10,R11, 64+stack) \ 162 \ 163 \ // m * N 164 mul(·p2+0(SB),·p2+8(SB),·p2+16(SB),·p2+24(SB), 64+stack, 96+stack) \ 165 \ 166 \ // Add the 512-bit intermediate to m*N 167 loadBlock(96+stack, R8,R9,R10,R11) \ 168 loadBlock(128+stack, R12,R13,R14,R15) \ 169 \ 170 MOVQ $0, AX \ 171 ADDQ 0+stack, R8 \ 172 ADCQ 8+stack, R9 \ 173 ADCQ 16+stack, R10 \ 174 ADCQ 24+stack, R11 \ 175 ADCQ 32+stack, R12 \ 176 ADCQ 40+stack, R13 \ 177 ADCQ 48+stack, R14 \ 178 ADCQ 56+stack, R15 \ 179 ADCQ $0, AX \ 180 \ 181 gfpCarry(R12,R13,R14,R15,AX, R8,R9,R10,R11,BX)