github.com/emmansun/gmsm@v0.29.1/sm3/sm3block_amd64.s (about)

     1  //go:build !purego
     2  
     3  #include "textflag.h"
     4  
     5  #include "sm3_const_asm.s"
     6  
     7  // xorm (mem), reg
     8  // Xor reg to mem using reg-mem xor and store
     9  #define xorm(P1, P2) \
    10  	XORL P2, P1; \
    11  	MOVL P1, P2
    12  
    13  #define a R8
    14  #define b R9
    15  #define c R10
    16  #define d R11
    17  #define e R12
    18  #define f R13
    19  #define g R14
    20  #define h DI
    21  
    22  // Wt = Mt; for 0 <= t <= 3
    23  #define MSGSCHEDULE0(index) \
    24  	MOVL	(index*4)(SI), AX; \
    25  	BSWAPL	AX; \
    26  	MOVL	AX, (index*4)(BP)
    27  
    28  // Wt+4 = Mt+4; for 0 <= t <= 11
    29  #define MSGSCHEDULE01(index) \
    30  	MOVL	((index+4)*4)(SI), AX; \
    31  	BSWAPL	AX; \
    32  	MOVL	AX, ((index+4)*4)(BP)
    33  
    34  // x = Wt-12 XOR Wt-5 XOR ROTL(15, Wt+1)
    35  // p1(x) = x XOR ROTL(15, x) XOR ROTL(23, x)
    36  // Wt+4 = p1(x) XOR ROTL(7, Wt-9) XOR Wt-2
    37  // for 12 <= t <= 63
    38  #define MSGSCHEDULE1(index) \
    39  	MOVL	((index+1)*4)(BP), AX; \
    40  	ROLL  $15, AX; \
    41  	MOVL	((index-12)*4)(BP), BX; \
    42  	XORL  BX, AX; \
    43  	MOVL	((index-5)*4)(BP), BX; \
    44  	XORL  BX, AX; \
    45  	MOVL  AX, BX; \
    46  	ROLL  $15, BX; \
    47  	XORL  BX, AX; \
    48  	ROLL  $8, BX; \
    49  	XORL  BX, AX; \
    50  	MOVL	((index-9)*4)(BP), BX; \
    51  	ROLL  $7, BX; \
    52  	XORL  BX, AX; \
    53  	MOVL	((index-2)*4)(BP), BX; \
    54  	XORL  BX, AX; \
    55  	MOVL  AX, ((index+4)*4)(BP)
    56  
    57  // Calculate ss1 in BX
    58  // x = ROTL(12, a) + e + ROTL(index, const)
    59  // ret = ROTL(7, x)
    60  #define SM3SS1(const, a, e) \
    61  	MOVL  a, BX; \
    62  	ROLL  $12, BX; \
    63  	ADDL  e, BX; \
    64  	ADDL  $const, BX; \
    65  	ROLL  $7, BX
    66  
    67  // Calculate tt1 in CX
    68  // ret = (a XOR b XOR c) + d + (ROTL(12, a) XOR ss1) + (Wt XOR Wt+4)
    69  #define SM3TT10(index, a, b, c, d) \  
    70  	MOVL b, DX; \
    71  	XORL a, DX; \
    72  	XORL c, DX; \  // (a XOR b XOR c)
    73  	ADDL d, DX; \   // (a XOR b XOR c) + d 
    74  	MOVL ((index)*4)(BP), CX; \ //Wt
    75  	XORL CX, AX; \ //Wt XOR Wt+4
    76  	ADDL AX, DX;  \
    77  	MOVL a, CX; \
    78  	ROLL $12, CX; \
    79  	XORL BX, CX; \ // ROTL(12, a) XOR ss1
    80  	ADDL DX, CX  // (a XOR b XOR c) + d + (ROTL(12, a) XOR ss1)
    81  
    82  // Calculate tt2 in BX
    83  // ret = (e XOR f XOR g) + h + ss1 + Wt
    84  #define SM3TT20(index, e, f, g, h) \
    85  	MOVL ((index)*4)(BP), DX; \ //Wt
    86  	ADDL h, DX; \   //Wt + h
    87  	ADDL BX, DX; \  //Wt + h + ss1
    88  	MOVL e, BX; \
    89  	XORL f, BX; \  // e XOR f
    90  	XORL g, BX; \  // e XOR f XOR g
    91  	ADDL DX, BX     // (e XOR f XOR g) + Wt + h + ss1
    92  
    93  // Calculate tt1 in CX, used DX
    94  // ret = ((a AND b) OR (a AND c) OR (b AND c)) + d + (ROTL(12, a) XOR ss1) + (Wt XOR Wt+4)
    95  #define SM3TT11(index, a, b, c, d) \  
    96  	MOVL a, DX; \
    97  	ORL  b, DX; \  // a AND b
    98  	MOVL a, CX; \
    99  	ANDL b, CX; \  // a AND b
   100  	ANDL c, DX; \
   101  	ORL  CX, DX; \  // (a AND b) OR (a AND c) OR (b AND c)
   102  	ADDL d, DX; \
   103  	MOVL a, CX; \
   104  	ROLL $12, CX; \
   105  	XORL BX, CX; \
   106  	ADDL DX, CX; \  // ((a AND b) OR (a AND c) OR (b AND c)) + d + (ROTL(12, a) XOR ss1)
   107  	MOVL ((index)*4)(BP), DX; \
   108  	XORL DX, AX; \  // Wt XOR Wt+4
   109  	ADDL AX, CX
   110  
   111  // Calculate tt2 in BX
   112  // ret = ((e AND f) OR (NOT(e) AND g)) + h + ss1 + Wt
   113  #define SM3TT21(index, e, f, g, h) \
   114  	MOVL ((index)*4)(BP), DX; \
   115  	ADDL h, DX; \   // Wt + h
   116  	ADDL BX, DX; \  // h + ss1 + Wt
   117  	MOVL f, BX; \   
   118  	XORL g, BX; \
   119  	ANDL e, BX; \
   120  	XORL g, BX; \ // GG2(e, f, g)
   121  	ADDL DX, BX
   122  
   123  #define COPYRESULT(b, d, f, h) \
   124  	ROLL $9, b; \
   125  	MOVL CX, h; \   // a = ttl
   126  	ROLL $19, f; \
   127  	MOVL BX, CX; \
   128  	ROLL $9, CX; \
   129  	XORL BX, CX; \  // tt2 XOR ROTL(9, tt2)
   130  	ROLL $17, BX; \
   131  	XORL BX, CX; \  // tt2 XOR ROTL(9, tt2) XOR ROTL(17, tt2)
   132  	MOVL CX, d    // e = tt2 XOR ROTL(9, tt2) XOR ROTL(17, tt2)
   133  
   134  #define SM3ROUND0(index, const, a, b, c, d, e, f, g, h) \
   135  	MSGSCHEDULE01(index); \
   136  	SM3SS1(const, a, e); \
   137  	SM3TT10(index, a, b, c, d); \
   138  	SM3TT20(index, e, f, g, h); \
   139  	COPYRESULT(b, d, f, h)
   140  
   141  #define SM3ROUND1(index, const, a, b, c, d, e, f, g, h) \
   142  	MSGSCHEDULE1(index); \
   143  	SM3SS1(const, a, e); \
   144  	SM3TT10(index, a, b, c, d); \
   145  	SM3TT20(index, e, f, g, h); \
   146  	COPYRESULT(b, d, f, h)
   147  
   148  #define SM3ROUND2(index, const, a, b, c, d, e, f, g, h) \
   149  	MSGSCHEDULE1(index); \
   150  	SM3SS1(const, a, e); \
   151  	SM3TT11(index, a, b, c, d); \
   152  	SM3TT21(index, e, f, g, h); \
   153  	COPYRESULT(b, d, f, h)
   154  
   155  TEXT ·blockAMD64(SB), 0, $288-32
   156  	MOVQ p_base+8(FP), SI
   157  	MOVQ p_len+16(FP), DX
   158  	SHRQ $6, DX
   159  	SHLQ $6, DX
   160  
   161  	LEAQ (SI)(DX*1), DI
   162  	MOVQ DI, 272(SP)
   163  	CMPQ SI, DI
   164  	JEQ  end
   165  
   166  	MOVQ dig+0(FP), BP
   167  	MOVL (0*4)(BP), a // a = H0
   168  	MOVL (1*4)(BP), b // b = H1
   169  	MOVL (2*4)(BP), c // c = H2
   170  	MOVL (3*4)(BP), d // d = H3
   171  	MOVL (4*4)(BP), e // e = H4
   172  	MOVL (5*4)(BP), f // f = H5
   173  	MOVL (6*4)(BP), g // g = H6
   174  	MOVL (7*4)(BP), h // h = H7
   175  
   176  loop:
   177  	MOVQ SP, BP
   178  
   179  	MSGSCHEDULE0(0)
   180  	MSGSCHEDULE0(1)
   181  	MSGSCHEDULE0(2)
   182  	MSGSCHEDULE0(3)
   183  
   184  	SM3ROUND0(0, T0, a, b, c, d, e, f, g, h)
   185  	SM3ROUND0(1, T1, h, a, b, c, d, e, f, g)
   186  	SM3ROUND0(2, T2, g, h, a, b, c, d, e, f)
   187  	SM3ROUND0(3, T3, f, g, h, a, b, c, d, e)
   188  	SM3ROUND0(4, T4, e, f, g, h, a, b, c, d)
   189  	SM3ROUND0(5, T5, d, e, f, g, h, a, b, c)
   190  	SM3ROUND0(6, T6, c, d, e, f, g, h, a, b)
   191  	SM3ROUND0(7, T7, b, c, d, e, f, g, h, a)
   192  	SM3ROUND0(8, T8, a, b, c, d, e, f, g, h)
   193  	SM3ROUND0(9, T9, h, a, b, c, d, e, f, g)
   194  	SM3ROUND0(10, T10, g, h, a, b, c, d, e, f)
   195  	SM3ROUND0(11, T11, f, g, h, a, b, c, d, e)
   196    
   197  	SM3ROUND1(12, T12, e, f, g, h, a, b, c, d)
   198  	SM3ROUND1(13, T13, d, e, f, g, h, a, b, c)
   199  	SM3ROUND1(14, T14, c, d, e, f, g, h, a, b)
   200  	SM3ROUND1(15, T15, b, c, d, e, f, g, h, a)
   201    
   202  	SM3ROUND2(16, T16, a, b, c, d, e, f, g, h)
   203  	SM3ROUND2(17, T17, h, a, b, c, d, e, f, g)
   204  	SM3ROUND2(18, T18, g, h, a, b, c, d, e, f)
   205  	SM3ROUND2(19, T19, f, g, h, a, b, c, d, e)
   206  	SM3ROUND2(20, T20, e, f, g, h, a, b, c, d)
   207  	SM3ROUND2(21, T21, d, e, f, g, h, a, b, c)
   208  	SM3ROUND2(22, T22, c, d, e, f, g, h, a, b)
   209  	SM3ROUND2(23, T23, b, c, d, e, f, g, h, a)
   210  	SM3ROUND2(24, T24, a, b, c, d, e, f, g, h)
   211  	SM3ROUND2(25, T25, h, a, b, c, d, e, f, g)
   212  	SM3ROUND2(26, T26, g, h, a, b, c, d, e, f)
   213  	SM3ROUND2(27, T27, f, g, h, a, b, c, d, e)
   214  	SM3ROUND2(28, T28, e, f, g, h, a, b, c, d)
   215  	SM3ROUND2(29, T29, d, e, f, g, h, a, b, c)
   216  	SM3ROUND2(30, T30, c, d, e, f, g, h, a, b)
   217  	SM3ROUND2(31, T31, b, c, d, e, f, g, h, a)
   218  	SM3ROUND2(32, T32, a, b, c, d, e, f, g, h)
   219  	SM3ROUND2(33, T33, h, a, b, c, d, e, f, g)
   220  	SM3ROUND2(34, T34, g, h, a, b, c, d, e, f)
   221  	SM3ROUND2(35, T35, f, g, h, a, b, c, d, e)
   222  	SM3ROUND2(36, T36, e, f, g, h, a, b, c, d)
   223  	SM3ROUND2(37, T37, d, e, f, g, h, a, b, c)
   224  	SM3ROUND2(38, T38, c, d, e, f, g, h, a, b)
   225  	SM3ROUND2(39, T39, b, c, d, e, f, g, h, a)
   226  	SM3ROUND2(40, T40, a, b, c, d, e, f, g, h)
   227  	SM3ROUND2(41, T41, h, a, b, c, d, e, f, g)
   228  	SM3ROUND2(42, T42, g, h, a, b, c, d, e, f)
   229  	SM3ROUND2(43, T43, f, g, h, a, b, c, d, e)
   230  	SM3ROUND2(44, T44, e, f, g, h, a, b, c, d)
   231  	SM3ROUND2(45, T45, d, e, f, g, h, a, b, c)
   232  	SM3ROUND2(46, T46, c, d, e, f, g, h, a, b)
   233  	SM3ROUND2(47, T47, b, c, d, e, f, g, h, a)
   234  	SM3ROUND2(48, T48, a, b, c, d, e, f, g, h)
   235  	SM3ROUND2(49, T49, h, a, b, c, d, e, f, g)
   236  	SM3ROUND2(50, T50, g, h, a, b, c, d, e, f)
   237  	SM3ROUND2(51, T51, f, g, h, a, b, c, d, e)
   238  	SM3ROUND2(52, T52, e, f, g, h, a, b, c, d)
   239  	SM3ROUND2(53, T53, d, e, f, g, h, a, b, c)
   240  	SM3ROUND2(54, T54, c, d, e, f, g, h, a, b)
   241  	SM3ROUND2(55, T55, b, c, d, e, f, g, h, a)
   242  	SM3ROUND2(56, T56, a, b, c, d, e, f, g, h)
   243  	SM3ROUND2(57, T57, h, a, b, c, d, e, f, g)
   244  	SM3ROUND2(58, T58, g, h, a, b, c, d, e, f)
   245  	SM3ROUND2(59, T59, f, g, h, a, b, c, d, e)
   246  	SM3ROUND2(60, T60, e, f, g, h, a, b, c, d)
   247  	SM3ROUND2(61, T61, d, e, f, g, h, a, b, c)
   248  	SM3ROUND2(62, T62, c, d, e, f, g, h, a, b)
   249  	SM3ROUND2(63, T63, b, c, d, e, f, g, h, a)
   250  
   251  	MOVQ hg+0(FP), BP
   252  
   253  	xorm(  0(BP), a)
   254  	xorm(  4(BP), b)
   255  	xorm(  8(BP), c)
   256  	xorm( 12(BP), d)
   257  	xorm( 16(BP), e)
   258  	xorm( 20(BP), f)
   259  	xorm( 24(BP), g)
   260  	xorm( 28(BP), h)
   261  
   262  	ADDQ $64, SI
   263  	CMPQ SI, 272(SP)
   264  	JB   loop
   265  
   266  end:
   267  	RET