github.com/d4l3k/go@v0.0.0-20151015000803-65fc379daeda/src/crypto/sha1/sha1block_amd64.s (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // SHA1 block routine. See sha1block.go for Go equivalent.
     8  //
     9  // There are 80 rounds of 4 types:
    10  //   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    11  //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    12  //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    13  //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    14  //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    15  //
    16  // Each round loads or shuffles the data, then computes a per-round
    17  // function of b, c, d, and then mixes the result into and rotates the
    18  // five registers a, b, c, d, e holding the intermediate results.
    19  //
    20  // The register rotation is implemented by rotating the arguments to
    21  // the round macros instead of by explicit move instructions.
    22  
    23  #define LOAD(index) \
    24  	MOVL	(index*4)(SI), R10; \
    25  	BSWAPL	R10; \
    26  	MOVL	R10, (index*4)(SP)
    27  
    28  #define SHUFFLE(index) \
    29  	MOVL	(((index)&0xf)*4)(SP), R10; \
    30  	XORL	(((index-3)&0xf)*4)(SP), R10; \
    31  	XORL	(((index-8)&0xf)*4)(SP), R10; \
    32  	XORL	(((index-14)&0xf)*4)(SP), R10; \
    33  	ROLL	$1, R10; \
    34  	MOVL	R10, (((index)&0xf)*4)(SP)
    35  
    36  #define FUNC1(a, b, c, d, e) \
    37  	MOVL	d, R9; \
    38  	XORL	c, R9; \
    39  	ANDL	b, R9; \
    40  	XORL	d, R9
    41  
    42  #define FUNC2(a, b, c, d, e) \
    43  	MOVL	b, R9; \
    44  	XORL	c, R9; \
    45  	XORL	d, R9
    46  
    47  #define FUNC3(a, b, c, d, e) \
    48  	MOVL	b, R8; \
    49  	ORL	c, R8; \
    50  	ANDL	d, R8; \
    51  	MOVL	b, R9; \
    52  	ANDL	c, R9; \
    53  	ORL	R8, R9
    54  	
    55  #define FUNC4 FUNC2
    56  
    57  #define MIX(a, b, c, d, e, const) \
    58  	ROLL	$30, b; \
    59  	ADDL	R9, e; \
    60  	MOVL	a, R8; \
    61  	ROLL	$5, R8; \
    62  	LEAL	const(e)(R10*1), e; \
    63  	ADDL	R8, e
    64  
    65  #define ROUND1(a, b, c, d, e, index) \
    66  	LOAD(index); \
    67  	FUNC1(a, b, c, d, e); \
    68  	MIX(a, b, c, d, e, 0x5A827999)
    69  
    70  #define ROUND1x(a, b, c, d, e, index) \
    71  	SHUFFLE(index); \
    72  	FUNC1(a, b, c, d, e); \
    73  	MIX(a, b, c, d, e, 0x5A827999)
    74  
    75  #define ROUND2(a, b, c, d, e, index) \
    76  	SHUFFLE(index); \
    77  	FUNC2(a, b, c, d, e); \
    78  	MIX(a, b, c, d, e, 0x6ED9EBA1)
    79  
    80  #define ROUND3(a, b, c, d, e, index) \
    81  	SHUFFLE(index); \
    82  	FUNC3(a, b, c, d, e); \
    83  	MIX(a, b, c, d, e, 0x8F1BBCDC)
    84  
    85  #define ROUND4(a, b, c, d, e, index) \
    86  	SHUFFLE(index); \
    87  	FUNC4(a, b, c, d, e); \
    88  	MIX(a, b, c, d, e, 0xCA62C1D6)
    89  
    90  TEXT ·block(SB),NOSPLIT,$64-32
    91  	MOVQ	dig+0(FP),	BP
    92  	MOVQ	p_base+8(FP),	SI
    93  	MOVQ	p_len+16(FP),	DX
    94  	SHRQ	$6,		DX
    95  	SHLQ	$6,		DX
    96  	
    97  	LEAQ	(SI)(DX*1),	DI
    98  	MOVL	(0*4)(BP),	AX
    99  	MOVL	(1*4)(BP),	BX
   100  	MOVL	(2*4)(BP),	CX
   101  	MOVL	(3*4)(BP),	DX
   102  	MOVL	(4*4)(BP),	BP
   103  
   104  	CMPQ	SI,		DI
   105  	JEQ	end
   106  
   107  loop:
   108  	MOVL	AX,	R11
   109  	MOVL	BX,	R12
   110  	MOVL	CX,	R13
   111  	MOVL	DX,	R14
   112  	MOVL	BP,	R15
   113  
   114  	ROUND1(AX, BX, CX, DX, BP, 0)
   115  	ROUND1(BP, AX, BX, CX, DX, 1)
   116  	ROUND1(DX, BP, AX, BX, CX, 2)
   117  	ROUND1(CX, DX, BP, AX, BX, 3)
   118  	ROUND1(BX, CX, DX, BP, AX, 4)
   119  	ROUND1(AX, BX, CX, DX, BP, 5)
   120  	ROUND1(BP, AX, BX, CX, DX, 6)
   121  	ROUND1(DX, BP, AX, BX, CX, 7)
   122  	ROUND1(CX, DX, BP, AX, BX, 8)
   123  	ROUND1(BX, CX, DX, BP, AX, 9)
   124  	ROUND1(AX, BX, CX, DX, BP, 10)
   125  	ROUND1(BP, AX, BX, CX, DX, 11)
   126  	ROUND1(DX, BP, AX, BX, CX, 12)
   127  	ROUND1(CX, DX, BP, AX, BX, 13)
   128  	ROUND1(BX, CX, DX, BP, AX, 14)
   129  	ROUND1(AX, BX, CX, DX, BP, 15)
   130  
   131  	ROUND1x(BP, AX, BX, CX, DX, 16)
   132  	ROUND1x(DX, BP, AX, BX, CX, 17)
   133  	ROUND1x(CX, DX, BP, AX, BX, 18)
   134  	ROUND1x(BX, CX, DX, BP, AX, 19)
   135  	
   136  	ROUND2(AX, BX, CX, DX, BP, 20)
   137  	ROUND2(BP, AX, BX, CX, DX, 21)
   138  	ROUND2(DX, BP, AX, BX, CX, 22)
   139  	ROUND2(CX, DX, BP, AX, BX, 23)
   140  	ROUND2(BX, CX, DX, BP, AX, 24)
   141  	ROUND2(AX, BX, CX, DX, BP, 25)
   142  	ROUND2(BP, AX, BX, CX, DX, 26)
   143  	ROUND2(DX, BP, AX, BX, CX, 27)
   144  	ROUND2(CX, DX, BP, AX, BX, 28)
   145  	ROUND2(BX, CX, DX, BP, AX, 29)
   146  	ROUND2(AX, BX, CX, DX, BP, 30)
   147  	ROUND2(BP, AX, BX, CX, DX, 31)
   148  	ROUND2(DX, BP, AX, BX, CX, 32)
   149  	ROUND2(CX, DX, BP, AX, BX, 33)
   150  	ROUND2(BX, CX, DX, BP, AX, 34)
   151  	ROUND2(AX, BX, CX, DX, BP, 35)
   152  	ROUND2(BP, AX, BX, CX, DX, 36)
   153  	ROUND2(DX, BP, AX, BX, CX, 37)
   154  	ROUND2(CX, DX, BP, AX, BX, 38)
   155  	ROUND2(BX, CX, DX, BP, AX, 39)
   156  	
   157  	ROUND3(AX, BX, CX, DX, BP, 40)
   158  	ROUND3(BP, AX, BX, CX, DX, 41)
   159  	ROUND3(DX, BP, AX, BX, CX, 42)
   160  	ROUND3(CX, DX, BP, AX, BX, 43)
   161  	ROUND3(BX, CX, DX, BP, AX, 44)
   162  	ROUND3(AX, BX, CX, DX, BP, 45)
   163  	ROUND3(BP, AX, BX, CX, DX, 46)
   164  	ROUND3(DX, BP, AX, BX, CX, 47)
   165  	ROUND3(CX, DX, BP, AX, BX, 48)
   166  	ROUND3(BX, CX, DX, BP, AX, 49)
   167  	ROUND3(AX, BX, CX, DX, BP, 50)
   168  	ROUND3(BP, AX, BX, CX, DX, 51)
   169  	ROUND3(DX, BP, AX, BX, CX, 52)
   170  	ROUND3(CX, DX, BP, AX, BX, 53)
   171  	ROUND3(BX, CX, DX, BP, AX, 54)
   172  	ROUND3(AX, BX, CX, DX, BP, 55)
   173  	ROUND3(BP, AX, BX, CX, DX, 56)
   174  	ROUND3(DX, BP, AX, BX, CX, 57)
   175  	ROUND3(CX, DX, BP, AX, BX, 58)
   176  	ROUND3(BX, CX, DX, BP, AX, 59)
   177  	
   178  	ROUND4(AX, BX, CX, DX, BP, 60)
   179  	ROUND4(BP, AX, BX, CX, DX, 61)
   180  	ROUND4(DX, BP, AX, BX, CX, 62)
   181  	ROUND4(CX, DX, BP, AX, BX, 63)
   182  	ROUND4(BX, CX, DX, BP, AX, 64)
   183  	ROUND4(AX, BX, CX, DX, BP, 65)
   184  	ROUND4(BP, AX, BX, CX, DX, 66)
   185  	ROUND4(DX, BP, AX, BX, CX, 67)
   186  	ROUND4(CX, DX, BP, AX, BX, 68)
   187  	ROUND4(BX, CX, DX, BP, AX, 69)
   188  	ROUND4(AX, BX, CX, DX, BP, 70)
   189  	ROUND4(BP, AX, BX, CX, DX, 71)
   190  	ROUND4(DX, BP, AX, BX, CX, 72)
   191  	ROUND4(CX, DX, BP, AX, BX, 73)
   192  	ROUND4(BX, CX, DX, BP, AX, 74)
   193  	ROUND4(AX, BX, CX, DX, BP, 75)
   194  	ROUND4(BP, AX, BX, CX, DX, 76)
   195  	ROUND4(DX, BP, AX, BX, CX, 77)
   196  	ROUND4(CX, DX, BP, AX, BX, 78)
   197  	ROUND4(BX, CX, DX, BP, AX, 79)
   198  
   199  	ADDL	R11, AX
   200  	ADDL	R12, BX
   201  	ADDL	R13, CX
   202  	ADDL	R14, DX
   203  	ADDL	R15, BP
   204  
   205  	ADDQ	$64, SI
   206  	CMPQ	SI, DI
   207  	JB	loop
   208  
   209  end:
   210  	MOVQ	dig+0(FP), DI
   211  	MOVL	AX, (0*4)(DI)
   212  	MOVL	BX, (1*4)(DI)
   213  	MOVL	CX, (2*4)(DI)
   214  	MOVL	DX, (3*4)(DI)
   215  	MOVL	BP, (4*4)(DI)
   216  	RET