github.com/d4l3k/go@v0.0.0-20151015000803-65fc379daeda/src/crypto/sha1/sha1block_386.s (about)

     1  // Copyright 2013 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // SHA1 block routine. See sha1block.go for Go equivalent.
     8  //
     9  // There are 80 rounds of 4 types:
    10  //   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    11  //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    12  //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    13  //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    14  //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    15  //
    16  // Each round loads or shuffles the data, then computes a per-round
    17  // function of b, c, d, and then mixes the result into and rotates the
    18  // five registers a, b, c, d, e holding the intermediate results.
    19  //
    20  // The register rotation is implemented by rotating the arguments to
    21  // the round macros instead of by explicit move instructions.
    22  
    23  // Like sha1block_amd64.s, but we keep the data and limit pointers on the stack.
    24  // To free up the word pointer (R10 on amd64, DI here), we add it to e during
    25  // LOAD/SHUFFLE instead of during MIX.
    26  //
    27  // The stack holds the intermediate word array - 16 uint32s - at 0(SP) up to 64(SP).
    28  // The saved a, b, c, d, e (R11 through R15 on amd64) are at 64(SP) up to 84(SP).
    29  // The saved limit pointer (DI on amd64) is at 84(SP).
    30  // The saved data pointer (SI on amd64) is at 88(SP).
    31  
    32  #define LOAD(index, e) \
    33  	MOVL	88(SP), SI; \
    34  	MOVL	(index*4)(SI), DI; \
    35  	BSWAPL	DI; \
    36  	MOVL	DI, (index*4)(SP); \
    37  	ADDL	DI, e
    38  
    39  #define SHUFFLE(index, e) \
    40  	MOVL	(((index)&0xf)*4)(SP), DI; \
    41  	XORL	(((index-3)&0xf)*4)(SP), DI; \
    42  	XORL	(((index-8)&0xf)*4)(SP), DI; \
    43  	XORL	(((index-14)&0xf)*4)(SP), DI; \
    44  	ROLL	$1, DI; \
    45  	MOVL	DI, (((index)&0xf)*4)(SP); \
    46  	ADDL	DI, e
    47  
    48  #define FUNC1(a, b, c, d, e) \
    49  	MOVL	d, DI; \
    50  	XORL	c, DI; \
    51  	ANDL	b, DI; \
    52  	XORL	d, DI
    53  
    54  #define FUNC2(a, b, c, d, e) \
    55  	MOVL	b, DI; \
    56  	XORL	c, DI; \
    57  	XORL	d, DI
    58  
    59  #define FUNC3(a, b, c, d, e) \
    60  	MOVL	b, SI; \
    61  	ORL	c, SI; \
    62  	ANDL	d, SI; \
    63  	MOVL	b, DI; \
    64  	ANDL	c, DI; \
    65  	ORL	SI, DI
    66  
    67  #define FUNC4 FUNC2
    68  
    69  #define MIX(a, b, c, d, e, const) \
    70  	ROLL	$30, b; \
    71  	ADDL	DI, e; \
    72  	MOVL	a, SI; \
    73  	ROLL	$5, SI; \
    74  	LEAL	const(e)(SI*1), e
    75  
    76  #define ROUND1(a, b, c, d, e, index) \
    77  	LOAD(index, e); \
    78  	FUNC1(a, b, c, d, e); \
    79  	MIX(a, b, c, d, e, 0x5A827999)
    80  
    81  #define ROUND1x(a, b, c, d, e, index) \
    82  	SHUFFLE(index, e); \
    83  	FUNC1(a, b, c, d, e); \
    84  	MIX(a, b, c, d, e, 0x5A827999)
    85  
    86  #define ROUND2(a, b, c, d, e, index) \
    87  	SHUFFLE(index, e); \
    88  	FUNC2(a, b, c, d, e); \
    89  	MIX(a, b, c, d, e, 0x6ED9EBA1)
    90  
    91  #define ROUND3(a, b, c, d, e, index) \
    92  	SHUFFLE(index, e); \
    93  	FUNC3(a, b, c, d, e); \
    94  	MIX(a, b, c, d, e, 0x8F1BBCDC)
    95  
    96  #define ROUND4(a, b, c, d, e, index) \
    97  	SHUFFLE(index, e); \
    98  	FUNC4(a, b, c, d, e); \
    99  	MIX(a, b, c, d, e, 0xCA62C1D6)
   100  
   101  // func block(dig *digest, p []byte)
   102  TEXT ·block(SB),NOSPLIT,$92-16
   103  	MOVL	dig+0(FP),	BP
   104  	MOVL	p+4(FP),	SI
   105  	MOVL	p_len+8(FP),	DX
   106  	SHRL	$6,		DX
   107  	SHLL	$6,		DX
   108  	
   109  	LEAL	(SI)(DX*1),	DI
   110  	MOVL	(0*4)(BP),	AX
   111  	MOVL	(1*4)(BP),	BX
   112  	MOVL	(2*4)(BP),	CX
   113  	MOVL	(3*4)(BP),	DX
   114  	MOVL	(4*4)(BP),	BP
   115  
   116  	CMPL	SI,		DI
   117  	JEQ	end
   118  
   119  	MOVL	DI,	84(SP)
   120  
   121  loop:
   122  	MOVL	SI,	88(SP)
   123  
   124  	MOVL	AX,	64(SP)
   125  	MOVL	BX,	68(SP)
   126  	MOVL	CX,	72(SP)
   127  	MOVL	DX,	76(SP)
   128  	MOVL	BP,	80(SP)
   129  
   130  	ROUND1(AX, BX, CX, DX, BP, 0)
   131  	ROUND1(BP, AX, BX, CX, DX, 1)
   132  	ROUND1(DX, BP, AX, BX, CX, 2)
   133  	ROUND1(CX, DX, BP, AX, BX, 3)
   134  	ROUND1(BX, CX, DX, BP, AX, 4)
   135  	ROUND1(AX, BX, CX, DX, BP, 5)
   136  	ROUND1(BP, AX, BX, CX, DX, 6)
   137  	ROUND1(DX, BP, AX, BX, CX, 7)
   138  	ROUND1(CX, DX, BP, AX, BX, 8)
   139  	ROUND1(BX, CX, DX, BP, AX, 9)
   140  	ROUND1(AX, BX, CX, DX, BP, 10)
   141  	ROUND1(BP, AX, BX, CX, DX, 11)
   142  	ROUND1(DX, BP, AX, BX, CX, 12)
   143  	ROUND1(CX, DX, BP, AX, BX, 13)
   144  	ROUND1(BX, CX, DX, BP, AX, 14)
   145  	ROUND1(AX, BX, CX, DX, BP, 15)
   146  
   147  	ROUND1x(BP, AX, BX, CX, DX, 16)
   148  	ROUND1x(DX, BP, AX, BX, CX, 17)
   149  	ROUND1x(CX, DX, BP, AX, BX, 18)
   150  	ROUND1x(BX, CX, DX, BP, AX, 19)
   151  	
   152  	ROUND2(AX, BX, CX, DX, BP, 20)
   153  	ROUND2(BP, AX, BX, CX, DX, 21)
   154  	ROUND2(DX, BP, AX, BX, CX, 22)
   155  	ROUND2(CX, DX, BP, AX, BX, 23)
   156  	ROUND2(BX, CX, DX, BP, AX, 24)
   157  	ROUND2(AX, BX, CX, DX, BP, 25)
   158  	ROUND2(BP, AX, BX, CX, DX, 26)
   159  	ROUND2(DX, BP, AX, BX, CX, 27)
   160  	ROUND2(CX, DX, BP, AX, BX, 28)
   161  	ROUND2(BX, CX, DX, BP, AX, 29)
   162  	ROUND2(AX, BX, CX, DX, BP, 30)
   163  	ROUND2(BP, AX, BX, CX, DX, 31)
   164  	ROUND2(DX, BP, AX, BX, CX, 32)
   165  	ROUND2(CX, DX, BP, AX, BX, 33)
   166  	ROUND2(BX, CX, DX, BP, AX, 34)
   167  	ROUND2(AX, BX, CX, DX, BP, 35)
   168  	ROUND2(BP, AX, BX, CX, DX, 36)
   169  	ROUND2(DX, BP, AX, BX, CX, 37)
   170  	ROUND2(CX, DX, BP, AX, BX, 38)
   171  	ROUND2(BX, CX, DX, BP, AX, 39)
   172  	
   173  	ROUND3(AX, BX, CX, DX, BP, 40)
   174  	ROUND3(BP, AX, BX, CX, DX, 41)
   175  	ROUND3(DX, BP, AX, BX, CX, 42)
   176  	ROUND3(CX, DX, BP, AX, BX, 43)
   177  	ROUND3(BX, CX, DX, BP, AX, 44)
   178  	ROUND3(AX, BX, CX, DX, BP, 45)
   179  	ROUND3(BP, AX, BX, CX, DX, 46)
   180  	ROUND3(DX, BP, AX, BX, CX, 47)
   181  	ROUND3(CX, DX, BP, AX, BX, 48)
   182  	ROUND3(BX, CX, DX, BP, AX, 49)
   183  	ROUND3(AX, BX, CX, DX, BP, 50)
   184  	ROUND3(BP, AX, BX, CX, DX, 51)
   185  	ROUND3(DX, BP, AX, BX, CX, 52)
   186  	ROUND3(CX, DX, BP, AX, BX, 53)
   187  	ROUND3(BX, CX, DX, BP, AX, 54)
   188  	ROUND3(AX, BX, CX, DX, BP, 55)
   189  	ROUND3(BP, AX, BX, CX, DX, 56)
   190  	ROUND3(DX, BP, AX, BX, CX, 57)
   191  	ROUND3(CX, DX, BP, AX, BX, 58)
   192  	ROUND3(BX, CX, DX, BP, AX, 59)
   193  	
   194  	ROUND4(AX, BX, CX, DX, BP, 60)
   195  	ROUND4(BP, AX, BX, CX, DX, 61)
   196  	ROUND4(DX, BP, AX, BX, CX, 62)
   197  	ROUND4(CX, DX, BP, AX, BX, 63)
   198  	ROUND4(BX, CX, DX, BP, AX, 64)
   199  	ROUND4(AX, BX, CX, DX, BP, 65)
   200  	ROUND4(BP, AX, BX, CX, DX, 66)
   201  	ROUND4(DX, BP, AX, BX, CX, 67)
   202  	ROUND4(CX, DX, BP, AX, BX, 68)
   203  	ROUND4(BX, CX, DX, BP, AX, 69)
   204  	ROUND4(AX, BX, CX, DX, BP, 70)
   205  	ROUND4(BP, AX, BX, CX, DX, 71)
   206  	ROUND4(DX, BP, AX, BX, CX, 72)
   207  	ROUND4(CX, DX, BP, AX, BX, 73)
   208  	ROUND4(BX, CX, DX, BP, AX, 74)
   209  	ROUND4(AX, BX, CX, DX, BP, 75)
   210  	ROUND4(BP, AX, BX, CX, DX, 76)
   211  	ROUND4(DX, BP, AX, BX, CX, 77)
   212  	ROUND4(CX, DX, BP, AX, BX, 78)
   213  	ROUND4(BX, CX, DX, BP, AX, 79)
   214  
   215  	ADDL	64(SP), AX
   216  	ADDL	68(SP), BX
   217  	ADDL	72(SP), CX
   218  	ADDL	76(SP), DX
   219  	ADDL	80(SP), BP
   220  
   221  	MOVL	88(SP), SI
   222  	ADDL	$64, SI
   223  	CMPL	SI, 84(SP)
   224  	JB	loop
   225  
   226  end:
   227  	MOVL	dig+0(FP), DI
   228  	MOVL	AX, (0*4)(DI)
   229  	MOVL	BX, (1*4)(DI)
   230  	MOVL	CX, (2*4)(DI)
   231  	MOVL	DX, (3*4)(DI)
   232  	MOVL	BP, (4*4)(DI)
   233  	RET