github.com/akaros/go-akaros@v0.0.0-20181004170632-85005d477eab/src/crypto/sha1/sha1block_arm.s (about)

     1  // Copyright 2014 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  //
     5  // ARM version of md5block.go
     6  
     7  #include "textflag.h"
     8  
     9  // SHA1 block routine. See sha1block.go for Go equivalent.
    10  //
    11  // There are 80 rounds of 4 types:
    12  //   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    13  //   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    14  //   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    15  //   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    16  //   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    17  //
    18  // Each round loads or shuffles the data, then computes a per-round
    19  // function of b, c, d, and then mixes the result into and rotates the
    20  // five registers a, b, c, d, e holding the intermediate results.
    21  //
    22  // The register rotation is implemented by rotating the arguments to
    23  // the round macros instead of by explicit move instructions.
    24  
    25  // Register definitions
    26  data = 0	// Pointer to incoming data
    27  const = 1	// Current constant for SHA round
    28  a = 2		// SHA1 accumulator
    29  b = 3		// SHA1 accumulator
    30  c = 4		// SHA1 accumulator
    31  d = 5		// SHA1 accumulator
    32  e = 6		// SHA1 accumulator
    33  t0 = 7		// Temporary
    34  t1 = 8		// Temporary
    35  // r9, r10 are forbidden
    36  // r11 is OK provided you check the assembler that no synthetic instructions use it
    37  t2 = 11		// Temporary
    38  ctr = 12	// loop counter
    39  w = 14		// point to w buffer
    40  
    41  // func block(dig *digest, p []byte)
    42  // 0(FP) is *digest
    43  // 4(FP) is p.array (struct Slice)
    44  // 8(FP) is p.len
    45  //12(FP) is p.cap
    46  //
    47  // Stack frame
    48  p_end = -4		// -4(SP) pointer to the end of data
    49  p_data = p_end - 4	// -8(SP) current data pointer
    50  w_buf = p_data - 4*80	// -328(SP) 80 words temporary buffer w uint32[80]
    51  saved = w_buf - 4*5	// -348(SP) saved sha1 registers a,b,c,d,e - these must be last
    52  // Total size +4 for saved LR is 352
    53  
    54  	// w[i] = p[j]<<24 | p[j+1]<<16 | p[j+2]<<8 | p[j+3]
    55  	// e += w[i]
    56  #define LOAD(e) \
    57  	MOVBU	2(R(data)), R(t0) ; \
    58  	MOVBU	3(R(data)), R(t1) ; \
    59  	MOVBU	1(R(data)), R(t2) ; \
    60  	ORR	R(t0)<<8, R(t1), R(t0)	    ; \
    61  	MOVBU.P	4(R(data)), R(t1) ; \
    62  	ORR	R(t2)<<16, R(t0), R(t0)	    ; \
    63  	ORR	R(t1)<<24, R(t0), R(t0)	    ; \
    64  	MOVW.P	R(t0), 4(R(w))		    ; \
    65  	ADD	R(t0), R(e), R(e)
    66  	
    67  	// tmp := w[(i-3)&0xf] ^ w[(i-8)&0xf] ^ w[(i-14)&0xf] ^ w[(i)&0xf]
    68  	// w[i&0xf] = tmp<<1 | tmp>>(32-1)
    69  	// e += w[i&0xf] 
    70  #define SHUFFLE(e) \
    71  	MOVW	(-16*4)(R(w)), R(t0) ; \
    72  	MOVW	(-14*4)(R(w)), R(t1) ; \
    73  	MOVW	(-8*4)(R(w)), R(t2)  ; \
    74  	EOR	R(t0), R(t1), R(t0)  ; \
    75  	MOVW	(-3*4)(R(w)), R(t1)  ; \
    76  	EOR	R(t2), R(t0), R(t0)  ; \
    77  	EOR	R(t0), R(t1), R(t0)  ; \
    78  	MOVW	R(t0)@>(32-1), R(t0)  ; \
    79  	MOVW.P	R(t0), 4(R(w))	  ; \
    80  	ADD	R(t0), R(e), R(e)
    81  
    82  	// t1 = (b & c) | ((~b) & d)
    83  #define FUNC1(a, b, c, d, e) \
    84  	MVN	R(b), R(t1)	   ; \
    85  	AND	R(b), R(c), R(t0)  ; \
    86  	AND	R(d), R(t1), R(t1) ; \
    87  	ORR	R(t0), R(t1), R(t1)
    88  
    89  	// t1 = b ^ c ^ d
    90  #define FUNC2(a, b, c, d, e) \
    91  	EOR	R(b), R(c), R(t1) ; \
    92  	EOR	R(d), R(t1), R(t1)
    93  
    94  	// t1 = (b & c) | (b & d) | (c & d) =
    95  	// t1 = (b & c) | ((b | c) & d)
    96  #define FUNC3(a, b, c, d, e) \
    97  	ORR	R(b), R(c), R(t0)  ; \
    98  	AND	R(b), R(c), R(t1)  ; \
    99  	AND	R(d), R(t0), R(t0) ; \
   100  	ORR	R(t0), R(t1), R(t1)
   101  
   102  #define FUNC4 FUNC2
   103  
   104  	// a5 := a<<5 | a>>(32-5)
   105  	// b = b<<30 | b>>(32-30)
   106  	// e = a5 + t1 + e + const
   107  #define MIX(a, b, c, d, e) \
   108  	ADD	R(t1), R(e), R(e)	 ; \
   109  	MOVW	R(b)@>(32-30), R(b)	 ; \
   110  	ADD	R(a)@>(32-5), R(e), R(e) ; \
   111  	ADD	R(const), R(e), R(e)
   112  
   113  #define ROUND1(a, b, c, d, e) \
   114  	LOAD(e)		; \
   115  	FUNC1(a, b, c, d, e)	; \
   116  	MIX(a, b, c, d, e)
   117  
   118  #define ROUND1x(a, b, c, d, e) \
   119  	SHUFFLE(e)	; \
   120  	FUNC1(a, b, c, d, e)	; \
   121  	MIX(a, b, c, d, e)
   122  
   123  #define ROUND2(a, b, c, d, e) \
   124  	SHUFFLE(e)	; \
   125  	FUNC2(a, b, c, d, e)	; \
   126  	MIX(a, b, c, d, e)
   127  
   128  #define ROUND3(a, b, c, d, e) \
   129  	SHUFFLE(e)	; \
   130  	FUNC3(a, b, c, d, e)	; \
   131  	MIX(a, b, c, d, e)
   132  
   133  #define ROUND4(a, b, c, d, e) \
   134  	SHUFFLE(e)	; \
   135  	FUNC4(a, b, c, d, e)	; \
   136  	MIX(a, b, c, d, e)
   137  
   138  
   139  // func block(dig *digest, p []byte)
   140  TEXT	·block(SB), 0, $352-16
   141  	MOVW	p+4(FP), R(data)	// pointer to the data
   142  	MOVW	p_len+8(FP), R(t0)	// number of bytes
   143  	ADD	R(data), R(t0)
   144  	MOVW	R(t0), p_end(SP)	// pointer to end of data
   145  
   146  	// Load up initial SHA1 accumulator
   147  	MOVW	dig+0(FP), R(t0)
   148  	MOVM.IA (R(t0)), [R(a),R(b),R(c),R(d),R(e)]
   149  
   150  loop:
   151  	// Save registers at SP+4 onwards
   152  	MOVM.IB [R(a),R(b),R(c),R(d),R(e)], (R13)
   153  
   154  	MOVW	$w_buf(SP), R(w)
   155  	MOVW	$0x5A827999, R(const)
   156  	MOVW	$3, R(ctr)
   157  loop1:	ROUND1(a, b, c, d, e)
   158  	ROUND1(e, a, b, c, d)
   159  	ROUND1(d, e, a, b, c)
   160  	ROUND1(c, d, e, a, b)
   161  	ROUND1(b, c, d, e, a)
   162  	SUB.S	$1, R(ctr)
   163  	BNE	loop1
   164  
   165  	ROUND1(a, b, c, d, e)
   166  	ROUND1x(e, a, b, c, d)
   167  	ROUND1x(d, e, a, b, c)
   168  	ROUND1x(c, d, e, a, b)
   169  	ROUND1x(b, c, d, e, a)
   170  	
   171  	MOVW	$0x6ED9EBA1, R(const)
   172  	MOVW	$4, R(ctr)
   173  loop2:	ROUND2(a, b, c, d, e)
   174  	ROUND2(e, a, b, c, d)
   175  	ROUND2(d, e, a, b, c)
   176  	ROUND2(c, d, e, a, b)
   177  	ROUND2(b, c, d, e, a)
   178  	SUB.S	$1, R(ctr)
   179  	BNE	loop2
   180  	
   181  	MOVW	$0x8F1BBCDC, R(const)
   182  	MOVW	$4, R(ctr)
   183  loop3:	ROUND3(a, b, c, d, e)
   184  	ROUND3(e, a, b, c, d)
   185  	ROUND3(d, e, a, b, c)
   186  	ROUND3(c, d, e, a, b)
   187  	ROUND3(b, c, d, e, a)
   188  	SUB.S	$1, R(ctr)
   189  	BNE	loop3
   190  	
   191  	MOVW	$0xCA62C1D6, R(const)
   192  	MOVW	$4, R(ctr)
   193  loop4:	ROUND4(a, b, c, d, e)
   194  	ROUND4(e, a, b, c, d)
   195  	ROUND4(d, e, a, b, c)
   196  	ROUND4(c, d, e, a, b)
   197  	ROUND4(b, c, d, e, a)
   198  	SUB.S	$1, R(ctr)
   199  	BNE	loop4
   200  
   201  	// Accumulate - restoring registers from SP+4
   202  	MOVM.IB (R13), [R(t0),R(t1),R(t2),R(ctr),R(w)]
   203  	ADD	R(t0), R(a)
   204  	ADD	R(t1), R(b)
   205  	ADD	R(t2), R(c)
   206  	ADD	R(ctr), R(d)
   207  	ADD	R(w), R(e)
   208  
   209  	MOVW	p_end(SP), R(t0)
   210  	CMP	R(t0), R(data)
   211  	BLO	loop
   212  
   213  	// Save final SHA1 accumulator
   214  	MOVW	dig+0(FP), R(t0)
   215  	MOVM.IA [R(a),R(b),R(c),R(d),R(e)], (R(t0))
   216  
   217  	RET