github.com/insolar/x-crypto@v0.0.0-20191031140942-75fab8a325f6/sha1/sha1block_amd64p32.s (about)

     1  // Copyright 2013 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // SHA-1 block routine. See sha1block.go for Go equivalent.
     8  //
     9  // There are 80 rounds of 4 types:
    10  //   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    11  //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    12  //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    13  //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    14  //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    15  //
    16  // Each round loads or shuffles the data, then computes a per-round
    17  // function of b, c, d, and then mixes the result into and rotates the
    18  // five registers a, b, c, d, e holding the intermediate results.
    19  //
    20  // The register rotation is implemented by rotating the arguments to
    21  // the round macros instead of by explicit move instructions.
    22  //
    23  // amd64p32 version.
    24  // To ensure safety for Native Client, avoids use of BP and R15
    25  // as well as two-register addressing modes.
    26  
    27  #define LOAD(index) \
    28  	MOVL	(index*4)(SI), R10; \
    29  	BSWAPL	R10; \
    30  	MOVL	R10, (index*4)(SP)
    31  
    32  #define SHUFFLE(index) \
    33  	MOVL	(((index)&0xf)*4)(SP), R10; \
    34  	XORL	(((index-3)&0xf)*4)(SP), R10; \
    35  	XORL	(((index-8)&0xf)*4)(SP), R10; \
    36  	XORL	(((index-14)&0xf)*4)(SP), R10; \
    37  	ROLL	$1, R10; \
    38  	MOVL	R10, (((index)&0xf)*4)(SP)
    39  
    40  #define FUNC1(a, b, c, d, e) \
    41  	MOVL	d, R9; \
    42  	XORL	c, R9; \
    43  	ANDL	b, R9; \
    44  	XORL	d, R9
    45  
    46  #define FUNC2(a, b, c, d, e) \
    47  	MOVL	b, R9; \
    48  	XORL	c, R9; \
    49  	XORL	d, R9
    50  
    51  #define FUNC3(a, b, c, d, e) \
    52  	MOVL	b, R8; \
    53  	ORL	c, R8; \
    54  	ANDL	d, R8; \
    55  	MOVL	b, R9; \
    56  	ANDL	c, R9; \
    57  	ORL	R8, R9
    58  	
    59  #define FUNC4 FUNC2
    60  
    61  #define MIX(a, b, c, d, e, const) \
    62  	ROLL	$30, b; \
    63  	ADDL	R9, e; \
    64  	MOVL	a, R8; \
    65  	ROLL	$5, R8; \
    66  	LEAL	const(e)(R10*1), e; \
    67  	ADDL	R8, e
    68  
    69  #define ROUND1(a, b, c, d, e, index) \
    70  	LOAD(index); \
    71  	FUNC1(a, b, c, d, e); \
    72  	MIX(a, b, c, d, e, 0x5A827999)
    73  
    74  #define ROUND1x(a, b, c, d, e, index) \
    75  	SHUFFLE(index); \
    76  	FUNC1(a, b, c, d, e); \
    77  	MIX(a, b, c, d, e, 0x5A827999)
    78  
    79  #define ROUND2(a, b, c, d, e, index) \
    80  	SHUFFLE(index); \
    81  	FUNC2(a, b, c, d, e); \
    82  	MIX(a, b, c, d, e, 0x6ED9EBA1)
    83  
    84  #define ROUND3(a, b, c, d, e, index) \
    85  	SHUFFLE(index); \
    86  	FUNC3(a, b, c, d, e); \
    87  	MIX(a, b, c, d, e, 0x8F1BBCDC)
    88  
    89  #define ROUND4(a, b, c, d, e, index) \
    90  	SHUFFLE(index); \
    91  	FUNC4(a, b, c, d, e); \
    92  	MIX(a, b, c, d, e, 0xCA62C1D6)
    93  
    94  TEXT ·block(SB),NOSPLIT,$64-16
    95  	MOVL	dig+0(FP),	R14
    96  	MOVL	p_base+4(FP),	SI
    97  	MOVL	p_len+8(FP),	DX
    98  	SHRQ	$6,		DX
    99  	SHLQ	$6,		DX
   100  	
   101  	LEAQ	(SI)(DX*1),	DI
   102  	MOVL	(0*4)(R14),	AX
   103  	MOVL	(1*4)(R14),	BX
   104  	MOVL	(2*4)(R14),	CX
   105  	MOVL	(3*4)(R14),	DX
   106  	MOVL	(4*4)(R14),	R13
   107  
   108  	CMPQ	SI,		DI
   109  	JEQ	end
   110  
   111  loop:
   112  #define BP R13 /* keep diff from sha1block_amd64.s small */
   113  	ROUND1(AX, BX, CX, DX, BP, 0)
   114  	ROUND1(BP, AX, BX, CX, DX, 1)
   115  	ROUND1(DX, BP, AX, BX, CX, 2)
   116  	ROUND1(CX, DX, BP, AX, BX, 3)
   117  	ROUND1(BX, CX, DX, BP, AX, 4)
   118  	ROUND1(AX, BX, CX, DX, BP, 5)
   119  	ROUND1(BP, AX, BX, CX, DX, 6)
   120  	ROUND1(DX, BP, AX, BX, CX, 7)
   121  	ROUND1(CX, DX, BP, AX, BX, 8)
   122  	ROUND1(BX, CX, DX, BP, AX, 9)
   123  	ROUND1(AX, BX, CX, DX, BP, 10)
   124  	ROUND1(BP, AX, BX, CX, DX, 11)
   125  	ROUND1(DX, BP, AX, BX, CX, 12)
   126  	ROUND1(CX, DX, BP, AX, BX, 13)
   127  	ROUND1(BX, CX, DX, BP, AX, 14)
   128  	ROUND1(AX, BX, CX, DX, BP, 15)
   129  
   130  	ROUND1x(BP, AX, BX, CX, DX, 16)
   131  	ROUND1x(DX, BP, AX, BX, CX, 17)
   132  	ROUND1x(CX, DX, BP, AX, BX, 18)
   133  	ROUND1x(BX, CX, DX, BP, AX, 19)
   134  	
   135  	ROUND2(AX, BX, CX, DX, BP, 20)
   136  	ROUND2(BP, AX, BX, CX, DX, 21)
   137  	ROUND2(DX, BP, AX, BX, CX, 22)
   138  	ROUND2(CX, DX, BP, AX, BX, 23)
   139  	ROUND2(BX, CX, DX, BP, AX, 24)
   140  	ROUND2(AX, BX, CX, DX, BP, 25)
   141  	ROUND2(BP, AX, BX, CX, DX, 26)
   142  	ROUND2(DX, BP, AX, BX, CX, 27)
   143  	ROUND2(CX, DX, BP, AX, BX, 28)
   144  	ROUND2(BX, CX, DX, BP, AX, 29)
   145  	ROUND2(AX, BX, CX, DX, BP, 30)
   146  	ROUND2(BP, AX, BX, CX, DX, 31)
   147  	ROUND2(DX, BP, AX, BX, CX, 32)
   148  	ROUND2(CX, DX, BP, AX, BX, 33)
   149  	ROUND2(BX, CX, DX, BP, AX, 34)
   150  	ROUND2(AX, BX, CX, DX, BP, 35)
   151  	ROUND2(BP, AX, BX, CX, DX, 36)
   152  	ROUND2(DX, BP, AX, BX, CX, 37)
   153  	ROUND2(CX, DX, BP, AX, BX, 38)
   154  	ROUND2(BX, CX, DX, BP, AX, 39)
   155  	
   156  	ROUND3(AX, BX, CX, DX, BP, 40)
   157  	ROUND3(BP, AX, BX, CX, DX, 41)
   158  	ROUND3(DX, BP, AX, BX, CX, 42)
   159  	ROUND3(CX, DX, BP, AX, BX, 43)
   160  	ROUND3(BX, CX, DX, BP, AX, 44)
   161  	ROUND3(AX, BX, CX, DX, BP, 45)
   162  	ROUND3(BP, AX, BX, CX, DX, 46)
   163  	ROUND3(DX, BP, AX, BX, CX, 47)
   164  	ROUND3(CX, DX, BP, AX, BX, 48)
   165  	ROUND3(BX, CX, DX, BP, AX, 49)
   166  	ROUND3(AX, BX, CX, DX, BP, 50)
   167  	ROUND3(BP, AX, BX, CX, DX, 51)
   168  	ROUND3(DX, BP, AX, BX, CX, 52)
   169  	ROUND3(CX, DX, BP, AX, BX, 53)
   170  	ROUND3(BX, CX, DX, BP, AX, 54)
   171  	ROUND3(AX, BX, CX, DX, BP, 55)
   172  	ROUND3(BP, AX, BX, CX, DX, 56)
   173  	ROUND3(DX, BP, AX, BX, CX, 57)
   174  	ROUND3(CX, DX, BP, AX, BX, 58)
   175  	ROUND3(BX, CX, DX, BP, AX, 59)
   176  	
   177  	ROUND4(AX, BX, CX, DX, BP, 60)
   178  	ROUND4(BP, AX, BX, CX, DX, 61)
   179  	ROUND4(DX, BP, AX, BX, CX, 62)
   180  	ROUND4(CX, DX, BP, AX, BX, 63)
   181  	ROUND4(BX, CX, DX, BP, AX, 64)
   182  	ROUND4(AX, BX, CX, DX, BP, 65)
   183  	ROUND4(BP, AX, BX, CX, DX, 66)
   184  	ROUND4(DX, BP, AX, BX, CX, 67)
   185  	ROUND4(CX, DX, BP, AX, BX, 68)
   186  	ROUND4(BX, CX, DX, BP, AX, 69)
   187  	ROUND4(AX, BX, CX, DX, BP, 70)
   188  	ROUND4(BP, AX, BX, CX, DX, 71)
   189  	ROUND4(DX, BP, AX, BX, CX, 72)
   190  	ROUND4(CX, DX, BP, AX, BX, 73)
   191  	ROUND4(BX, CX, DX, BP, AX, 74)
   192  	ROUND4(AX, BX, CX, DX, BP, 75)
   193  	ROUND4(BP, AX, BX, CX, DX, 76)
   194  	ROUND4(DX, BP, AX, BX, CX, 77)
   195  	ROUND4(CX, DX, BP, AX, BX, 78)
   196  	ROUND4(BX, CX, DX, BP, AX, 79)
   197  #undef BP
   198  
   199  	ADDL	(0*4)(R14), AX
   200  	ADDL	(1*4)(R14), BX
   201  	ADDL	(2*4)(R14), CX
   202  	ADDL	(3*4)(R14), DX
   203  	ADDL	(4*4)(R14), R13
   204  
   205  	MOVL	AX, (0*4)(R14)
   206  	MOVL	BX, (1*4)(R14)
   207  	MOVL	CX, (2*4)(R14)
   208  	MOVL	DX, (3*4)(R14)
   209  	MOVL	R13, (4*4)(R14)
   210  
   211  	ADDQ	$64, SI
   212  	CMPQ	SI, DI
   213  	JB	loop
   214  
   215  end:
   216  	RET