github.com/bloxroute-labs/bor@v0.1.4/crypto/bn256/cloudflare/mul_arm64.h (about)

     1  #define mul(c0,c1,c2,c3,c4,c5,c6,c7) \
     2  	MUL R1, R5, c0 \
     3  	UMULH R1, R5, c1 \
     4  	MUL R1, R6, R0 \
     5  	ADDS R0, c1 \
     6  	UMULH R1, R6, c2 \
     7  	MUL R1, R7, R0 \
     8  	ADCS R0, c2 \
     9  	UMULH R1, R7, c3 \
    10  	MUL R1, R8, R0 \
    11  	ADCS R0, c3 \
    12  	UMULH R1, R8, c4 \
    13  	ADCS ZR, c4 \
    14  	\
    15  	MUL R2, R5, R1 \
    16  	UMULH R2, R5, R26 \
    17  	MUL R2, R6, R0 \
    18  	ADDS R0, R26 \
    19  	UMULH R2, R6, R27 \
    20  	MUL R2, R7, R0 \
    21  	ADCS R0, R27 \
    22  	UMULH R2, R7, R29 \
    23  	MUL R2, R8, R0 \
    24  	ADCS R0, R29 \
    25  	UMULH R2, R8, c5 \
    26  	ADCS ZR, c5 \
    27  	ADDS R1, c1 \
    28  	ADCS R26, c2 \
    29  	ADCS R27, c3 \
    30  	ADCS R29, c4 \
    31  	ADCS  ZR, c5 \
    32  	\
    33  	MUL R3, R5, R1 \
    34  	UMULH R3, R5, R26 \
    35  	MUL R3, R6, R0 \
    36  	ADDS R0, R26 \
    37  	UMULH R3, R6, R27 \
    38  	MUL R3, R7, R0 \
    39  	ADCS R0, R27 \
    40  	UMULH R3, R7, R29 \
    41  	MUL R3, R8, R0 \
    42  	ADCS R0, R29 \
    43  	UMULH R3, R8, c6 \
    44  	ADCS ZR, c6 \
    45  	ADDS R1, c2 \
    46  	ADCS R26, c3 \
    47  	ADCS R27, c4 \
    48  	ADCS R29, c5 \
    49  	ADCS  ZR, c6 \
    50  	\
    51  	MUL R4, R5, R1 \
    52  	UMULH R4, R5, R26 \
    53  	MUL R4, R6, R0 \
    54  	ADDS R0, R26 \
    55  	UMULH R4, R6, R27 \
    56  	MUL R4, R7, R0 \
    57  	ADCS R0, R27 \
    58  	UMULH R4, R7, R29 \
    59  	MUL R4, R8, R0 \
    60  	ADCS R0, R29 \
    61  	UMULH R4, R8, c7 \
    62  	ADCS ZR, c7 \
    63  	ADDS R1, c3 \
    64  	ADCS R26, c4 \
    65  	ADCS R27, c5 \
    66  	ADCS R29, c6 \
    67  	ADCS  ZR, c7
    68  
    69  #define gfpReduce() \
    70  	\ // m = (T * N') mod R, store m in R1:R2:R3:R4
    71  	MOVD ·np+0(SB), R17 \
    72  	MOVD ·np+8(SB), R25 \
    73  	MOVD ·np+16(SB), R19 \
    74  	MOVD ·np+24(SB), R20 \
    75  	\
    76  	MUL R9, R17, R1 \
    77  	UMULH R9, R17, R2 \
    78  	MUL R9, R25, R0 \
    79  	ADDS R0, R2 \
    80  	UMULH R9, R25, R3 \
    81  	MUL R9, R19, R0 \
    82  	ADCS R0, R3 \
    83  	UMULH R9, R19, R4 \
    84  	MUL R9, R20, R0 \
    85  	ADCS R0, R4 \
    86  	\
    87  	MUL R10, R17, R21 \
    88  	UMULH R10, R17, R22 \
    89  	MUL R10, R25, R0 \
    90  	ADDS R0, R22 \
    91  	UMULH R10, R25, R23 \
    92  	MUL R10, R19, R0 \
    93  	ADCS R0, R23 \
    94  	ADDS R21, R2 \
    95  	ADCS R22, R3 \
    96  	ADCS R23, R4 \
    97  	\
    98  	MUL R11, R17, R21 \
    99  	UMULH R11, R17, R22 \
   100  	MUL R11, R25, R0 \
   101  	ADDS R0, R22 \
   102  	ADDS R21, R3 \
   103  	ADCS R22, R4 \
   104  	\
   105  	MUL R12, R17, R21 \
   106  	ADDS R21, R4 \
   107  	\
   108  	\ // m * N
   109  	loadModulus(R5,R6,R7,R8) \
   110  	mul(R17,R25,R19,R20,R21,R22,R23,R24) \
   111  	\
   112  	\ // Add the 512-bit intermediate to m*N
   113  	MOVD  ZR, R0 \
   114  	ADDS  R9, R17 \
   115  	ADCS R10, R25 \
   116  	ADCS R11, R19 \
   117  	ADCS R12, R20 \
   118  	ADCS R13, R21 \
   119  	ADCS R14, R22 \
   120  	ADCS R15, R23 \
   121  	ADCS R16, R24 \
   122  	ADCS  ZR, R0 \
   123  	\
   124  	\ // Our output is R21:R22:R23:R24. Reduce mod p if necessary.
   125  	SUBS R5, R21, R10 \
   126  	SBCS R6, R22, R11 \
   127  	SBCS R7, R23, R12 \
   128  	SBCS R8, R24, R13 \
   129  	\
   130  	CSEL CS, R10, R21, R1 \
   131  	CSEL CS, R11, R22, R2 \
   132  	CSEL CS, R12, R23, R3 \
   133  	CSEL CS, R13, R24, R4