github.com/hxx258456/ccgo@v0.0.5-0.20230213014102-48b35f46f66f/sm3/sm3block_arm64.s (about)

     1  #include "textflag.h"
     2  
     3  #define SI R0
     4  #define DI R1
     5  #define BP R2
     6  #define AX R3
     7  #define BX R4
     8  #define CX R5
     9  #define DX R6
    10  #define hlp0 R7
    11  #define hlp1 R9
    12  
    13  // Wt+4 = Mt+4; for 0 <= t <= 11
    14  #define MSGSCHEDULE01(index) \
    15  	MOVW	((index+4)*4)(SI), AX; \
    16  	REVW	AX, AX; \
    17  	MOVW	AX, ((index+4)*4)(BP)
    18  
    19  // x = Wt-12 XOR Wt-5 XOR ROTL(15, Wt+1)
    20  // p1(x) = x XOR ROTL(15, x) XOR ROTL(23, x)
    21  // Wt+4 = p1(x) XOR ROTL(7, Wt-9) XOR Wt-2
    22  // for 12 <= t <= 63
    23  #define MSGSCHEDULE1(index) \
    24    MOVW	((index+1)*4)(BP), AX; \
    25    RORW  $17, AX; \
    26    MOVW	((index-12)*4)(BP), BX; \
    27    EORW  BX, AX; \
    28    MOVW	((index-5)*4)(BP), BX; \
    29    EORW  BX, AX; \                      // AX = x
    30    RORW  $17, AX, BX; \                 // BX =  ROTL(15, x)
    31    RORW  $9, AX, CX; \                  // CX = ROTL(23, x)   
    32    EORW  BX, AX; \                      // AX = x xor ROTL(15, x)  
    33    EORW  CX, AX; \                      // AX = x xor ROTL(15, x) xor ROTL(23, x)  
    34    MOVW	((index-9)*4)(BP), BX; \
    35    RORW  $25, BX; \
    36    MOVW	((index-2)*4)(BP), CX; \
    37    EORW  BX, AX; \
    38    EORW  CX, AX; \
    39    MOVW  AX, ((index+4)*4)(BP)
    40  
    41  // Calculate ss1 in BX
    42  // x = ROTL(12, a) + e + ROTL(index, const)
    43  // ret = ROTL(7, x)
    44  #define SM3SS1(const, a, e) \
    45    RORW  $20, a, BX; \
    46    ADDW  e, BX; \
    47    ADDW  $const, BX; \
    48    RORW  $25, BX
    49  
    50  // Calculate tt1 in CX
    51  // ret = (a XOR b XOR c) + d + (ROTL(12, a) XOR ss1) + (Wt XOR Wt+4)
    52  #define SM3TT10(index, a, b, c, d) \  
    53    EORW a, b, DX; \
    54    EORW c, DX; \                      // (a XOR b XOR c)
    55    ADDW d, DX; \                      // (a XOR b XOR c) + d 
    56    MOVW ((index)*4)(BP), hlp0; \      // Wt
    57    EORW hlp0, AX; \                   // Wt XOR Wt+4
    58    ADDW AX, DX;  \
    59    RORW $20, a, CX; \
    60    EORW BX, CX; \                     // ROTL(12, a) XOR ss1
    61    ADDW DX, CX                        // (a XOR b XOR c) + d + (ROTL(12, a) XOR ss1)
    62  
    63  // Calculate tt2 in BX
    64  // ret = (e XOR f XOR g) + h + ss1 + Wt
    65  #define SM3TT20(e, f, g, h) \  
    66    ADDW h, hlp0; \                    // Wt + h
    67    ADDW BX, hlp0; \                   // Wt + h + ss1
    68    EORW e, f, BX; \                   // e XOR f
    69    EORW g, BX; \                      // e XOR f XOR g
    70    ADDW hlp0, BX                      // (e XOR f XOR g) + Wt + h + ss1
    71  
    72  // Calculate tt1 in CX, used DX, hlp0
    73  // ret = ((a AND b) OR (a AND c) OR (b AND c)) + d + (ROTL(12, a) XOR ss1) + (Wt XOR Wt+4)
    74  #define SM3TT11(index, a, b, c, d) \  
    75    ANDW a, b, DX; \                   // a AND b
    76    ANDW a, c, CX; \                   // a AND c
    77    ORRW  DX, CX; \                    // (a AND b) OR (a AND c)
    78    ANDW b, c, DX; \                   // b AND c
    79    ORRW  CX, DX; \                    // (a AND b) OR (a AND c) OR (b AND c)
    80    ADDW d, DX; \
    81    RORW $20, a, CX; \
    82    EORW BX, CX; \
    83    ADDW DX, CX; \                     // ((a AND b) OR (a AND c) OR (b AND c)) + d + (ROTL(12, a) XOR ss1)
    84    MOVW ((index)*4)(BP), hlp0; \
    85    EORW hlp0, AX; \                   // Wt XOR Wt+4
    86    ADDW AX, CX
    87  
    88  // Calculate tt2 in BX
    89  // ret = ((e AND f) OR (NOT(e) AND g)) + h + ss1 + Wt
    90  #define SM3TT21(e, f, g, h) \  
    91    ADDW h, hlp0; \                    // Wt + h
    92    ADDW BX, hlp0; \                   // h + ss1 + Wt
    93    ANDW e, f, DX; \                   // e AND f
    94    MVNW e, BX; \                      // NOT(e)
    95    ANDW g, BX; \                      // NOT(e) AND g
    96    ORRW  DX, BX; \
    97    ADDW hlp0, BX
    98  
    99  #define COPYRESULT(b, d, f, h) \
   100    RORW $23, b; \
   101    MOVW CX, h; \                      // a = ttl
   102    RORW $13, f; \
   103    RORW $23, BX, CX; \
   104    EORW BX, CX; \                     // tt2 XOR ROTL(9, tt2)
   105    RORW $15, BX; \
   106    EORW BX, CX; \                     // tt2 XOR ROTL(9, tt2) XOR ROTL(17, tt2)
   107    MOVW CX, d                         // e = tt2 XOR ROTL(9, tt2) XOR ROTL(17, tt2)
   108  
   109  #define SM3ROUND0(index, const, a, b, c, d, e, f, g, h) \
   110    MSGSCHEDULE01(index); \
   111    SM3SS1(const, a, e); \
   112    SM3TT10(index, a, b, c, d); \
   113    SM3TT20(e, f, g, h); \
   114    COPYRESULT(b, d, f, h)
   115  
   116  #define SM3ROUND1(index, const, a, b, c, d, e, f, g, h) \
   117    MSGSCHEDULE1(index); \
   118    SM3SS1(const, a, e); \
   119    SM3TT10(index, a, b, c, d); \
   120    SM3TT20(e, f, g, h); \
   121    COPYRESULT(b, d, f, h)
   122  
   123  #define SM3ROUND2(index, const, a, b, c, d, e, f, g, h) \
   124    MSGSCHEDULE1(index); \
   125    SM3SS1(const, a, e); \
   126    SM3TT11(index, a, b, c, d); \
   127    SM3TT21(e, f, g, h); \
   128    COPYRESULT(b, d, f, h)
   129  
   130  // func blockARM64(dig *digest, p []byte)
   131  TEXT ·blockARM64(SB), 0, $272-32
   132    MOVD dig+0(FP), hlp1
   133    MOVD p_base+8(FP), SI
   134    MOVD p_len+16(FP), DX
   135    MOVD RSP, BP
   136  
   137    AND	$~63, DX
   138    CBZ	DX, end  
   139  
   140    ADD SI, DX, DI
   141  
   142    LDPW	(0*8)(hlp1), (R19, R20)
   143    LDPW	(1*8)(hlp1), (R21, R22)
   144    LDPW	(2*8)(hlp1), (R23, R24)
   145    LDPW	(3*8)(hlp1), (R25, R26)
   146  
   147  loop:
   148    MOVW  R19, R10
   149    MOVW  R20, R11
   150    MOVW  R21, R12
   151    MOVW  R22, R13
   152    MOVW  R23, R14
   153    MOVW  R24, R15
   154    MOVW  R25, R16
   155    MOVW  R26, R17
   156  
   157    // Wt = Mt; for 0 <= t <= 3
   158    LDPW	(0*8)(SI), (AX, BX)
   159    REVW	AX, AX
   160    REVW	BX, BX
   161    STPW	(AX, BX), (0*8)(BP)
   162  
   163    LDPW	(1*8)(SI), (CX, DX)
   164    REVW	CX, CX
   165    REVW	DX, DX
   166    STPW	(CX, DX), (1*8)(BP)
   167  
   168    SM3ROUND0(0, 0x79cc4519, R19, R20, R21, R22, R23, R24, R25, R26)
   169    SM3ROUND0(1, 0xf3988a32, R26, R19, R20, R21, R22, R23, R24, R25)
   170    SM3ROUND0(2, 0xe7311465, R25, R26, R19, R20, R21, R22, R23, R24)
   171    SM3ROUND0(3, 0xce6228cb, R24, R25, R26, R19, R20, R21, R22, R23)
   172    SM3ROUND0(4, 0x9cc45197, R23, R24, R25, R26, R19, R20, R21, R22)
   173    SM3ROUND0(5, 0x3988a32f, R22, R23, R24, R25, R26, R19, R20, R21)
   174    SM3ROUND0(6, 0x7311465e, R21, R22, R23, R24, R25, R26, R19, R20)
   175    SM3ROUND0(7, 0xe6228cbc, R20, R21, R22, R23, R24, R25, R26, R19)
   176    SM3ROUND0(8, 0xcc451979, R19, R20, R21, R22, R23, R24, R25, R26)
   177    SM3ROUND0(9, 0x988a32f3, R26, R19, R20, R21, R22, R23, R24, R25)
   178    SM3ROUND0(10, 0x311465e7, R25, R26, R19, R20, R21, R22, R23, R24)
   179    SM3ROUND0(11, 0x6228cbce, R24, R25, R26, R19, R20, R21, R22, R23)
   180    
   181    SM3ROUND1(12, 0xc451979c, R23, R24, R25, R26, R19, R20, R21, R22)
   182    SM3ROUND1(13, 0x88a32f39, R22, R23, R24, R25, R26, R19, R20, R21)
   183    SM3ROUND1(14, 0x11465e73, R21, R22, R23, R24, R25, R26, R19, R20)
   184    SM3ROUND1(15, 0x228cbce6, R20, R21, R22, R23, R24, R25, R26, R19)
   185    
   186    SM3ROUND2(16, 0x9d8a7a87, R19, R20, R21, R22, R23, R24, R25, R26)
   187    SM3ROUND2(17, 0x3b14f50f, R26, R19, R20, R21, R22, R23, R24, R25)
   188    SM3ROUND2(18, 0x7629ea1e, R25, R26, R19, R20, R21, R22, R23, R24)
   189    SM3ROUND2(19, 0xec53d43c, R24, R25, R26, R19, R20, R21, R22, R23)
   190    SM3ROUND2(20, 0xd8a7a879, R23, R24, R25, R26, R19, R20, R21, R22)
   191    SM3ROUND2(21, 0xb14f50f3, R22, R23, R24, R25, R26, R19, R20, R21)
   192    SM3ROUND2(22, 0x629ea1e7, R21, R22, R23, R24, R25, R26, R19, R20)
   193    SM3ROUND2(23, 0xc53d43ce, R20, R21, R22, R23, R24, R25, R26, R19)
   194    SM3ROUND2(24, 0x8a7a879d, R19, R20, R21, R22, R23, R24, R25, R26)
   195    SM3ROUND2(25, 0x14f50f3b, R26, R19, R20, R21, R22, R23, R24, R25)
   196    SM3ROUND2(26, 0x29ea1e76, R25, R26, R19, R20, R21, R22, R23, R24)
   197    SM3ROUND2(27, 0x53d43cec, R24, R25, R26, R19, R20, R21, R22, R23)
   198    SM3ROUND2(28, 0xa7a879d8, R23, R24, R25, R26, R19, R20, R21, R22)
   199    SM3ROUND2(29, 0x4f50f3b1, R22, R23, R24, R25, R26, R19, R20, R21)
   200    SM3ROUND2(30, 0x9ea1e762, R21, R22, R23, R24, R25, R26, R19, R20)
   201    SM3ROUND2(31, 0x3d43cec5, R20, R21, R22, R23, R24, R25, R26, R19)
   202    SM3ROUND2(32, 0x7a879d8a, R19, R20, R21, R22, R23, R24, R25, R26)
   203    SM3ROUND2(33, 0xf50f3b14, R26, R19, R20, R21, R22, R23, R24, R25)
   204    SM3ROUND2(34, 0xea1e7629, R25, R26, R19, R20, R21, R22, R23, R24)
   205    SM3ROUND2(35, 0xd43cec53, R24, R25, R26, R19, R20, R21, R22, R23)
   206    SM3ROUND2(36, 0xa879d8a7, R23, R24, R25, R26, R19, R20, R21, R22)
   207    SM3ROUND2(37, 0x50f3b14f, R22, R23, R24, R25, R26, R19, R20, R21)
   208    SM3ROUND2(38, 0xa1e7629e, R21, R22, R23, R24, R25, R26, R19, R20)
   209    SM3ROUND2(39, 0x43cec53d, R20, R21, R22, R23, R24, R25, R26, R19)
   210    SM3ROUND2(40, 0x879d8a7a, R19, R20, R21, R22, R23, R24, R25, R26)
   211    SM3ROUND2(41, 0xf3b14f5, R26, R19, R20, R21, R22, R23, R24, R25)
   212    SM3ROUND2(42, 0x1e7629ea, R25, R26, R19, R20, R21, R22, R23, R24)
   213    SM3ROUND2(43, 0x3cec53d4, R24, R25, R26, R19, R20, R21, R22, R23)
   214    SM3ROUND2(44, 0x79d8a7a8, R23, R24, R25, R26, R19, R20, R21, R22)
   215    SM3ROUND2(45, 0xf3b14f50, R22, R23, R24, R25, R26, R19, R20, R21)
   216    SM3ROUND2(46, 0xe7629ea1, R21, R22, R23, R24, R25, R26, R19, R20)
   217    SM3ROUND2(47, 0xcec53d43, R20, R21, R22, R23, R24, R25, R26, R19)
   218    SM3ROUND2(48, 0x9d8a7a87, R19, R20, R21, R22, R23, R24, R25, R26)
   219    SM3ROUND2(49, 0x3b14f50f, R26, R19, R20, R21, R22, R23, R24, R25)
   220    SM3ROUND2(50, 0x7629ea1e, R25, R26, R19, R20, R21, R22, R23, R24)
   221    SM3ROUND2(51, 0xec53d43c, R24, R25, R26, R19, R20, R21, R22, R23)
   222    SM3ROUND2(52, 0xd8a7a879, R23, R24, R25, R26, R19, R20, R21, R22)
   223    SM3ROUND2(53, 0xb14f50f3, R22, R23, R24, R25, R26, R19, R20, R21)
   224    SM3ROUND2(54, 0x629ea1e7, R21, R22, R23, R24, R25, R26, R19, R20)
   225    SM3ROUND2(55, 0xc53d43ce, R20, R21, R22, R23, R24, R25, R26, R19)
   226    SM3ROUND2(56, 0x8a7a879d, R19, R20, R21, R22, R23, R24, R25, R26)
   227    SM3ROUND2(57, 0x14f50f3b, R26, R19, R20, R21, R22, R23, R24, R25)
   228    SM3ROUND2(58, 0x29ea1e76, R25, R26, R19, R20, R21, R22, R23, R24)
   229    SM3ROUND2(59, 0x53d43cec, R24, R25, R26, R19, R20, R21, R22, R23)
   230    SM3ROUND2(60, 0xa7a879d8, R23, R24, R25, R26, R19, R20, R21, R22)
   231    SM3ROUND2(61, 0x4f50f3b1, R22, R23, R24, R25, R26, R19, R20, R21)
   232    SM3ROUND2(62, 0x9ea1e762, R21, R22, R23, R24, R25, R26, R19, R20)
   233    SM3ROUND2(63, 0x3d43cec5, R20, R21, R22, R23, R24, R25, R26, R19)
   234  
   235    EORW R10, R19  // H0 = a XOR H0
   236    EORW R11, R20  // H1 = b XOR H1
   237    EORW R12, R21  // H0 = a XOR H0
   238    EORW R13, R22  // H1 = b XOR H1
   239    EORW R14, R23  // H0 = a XOR H0
   240    EORW R15, R24  // H1 = b XOR H1
   241    EORW R16, R25  // H0 = a XOR H0
   242    EORW R17, R26  // H1 = b XOR H1
   243   
   244    ADD $64, SI
   245    CMP SI, DI
   246    BNE	loop
   247  
   248    STPW	(R19, R20), (0*8)(hlp1)
   249    STPW	(R21, R22), (1*8)(hlp1)
   250    STPW	(R23, R24), (2*8)(hlp1)
   251    STPW	(R25, R26), (3*8)(hlp1)
   252  
   253  end:	
   254    RET