github.com/psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/internal/poly1305/sum_ppc64le.s (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build gc && !purego
     6  // +build gc,!purego
     7  
     8  #include "textflag.h"
     9  
    10  // This was ported from the amd64 implementation.
    11  
    12  #define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
    13  	MOVD (msg), t0;  \
    14  	MOVD 8(msg), t1; \
    15  	MOVD $1, t2;     \
    16  	ADDC t0, h0, h0; \
    17  	ADDE t1, h1, h1; \
    18  	ADDE t2, h2;     \
    19  	ADD  $16, msg
    20  
    21  #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
    22  	MULLD  r0, h0, t0;  \
    23  	MULLD  r0, h1, t4;  \
    24  	MULHDU r0, h0, t1;  \
    25  	MULHDU r0, h1, t5;  \
    26  	ADDC   t4, t1, t1;  \
    27  	MULLD  r0, h2, t2;  \
    28  	ADDZE  t5;          \
    29  	MULHDU r1, h0, t4;  \
    30  	MULLD  r1, h0, h0;  \
    31  	ADD    t5, t2, t2;  \
    32  	ADDC   h0, t1, t1;  \
    33  	MULLD  h2, r1, t3;  \
    34  	ADDZE  t4, h0;      \
    35  	MULHDU r1, h1, t5;  \
    36  	MULLD  r1, h1, t4;  \
    37  	ADDC   t4, t2, t2;  \
    38  	ADDE   t5, t3, t3;  \
    39  	ADDC   h0, t2, t2;  \
    40  	MOVD   $-4, t4;     \
    41  	MOVD   t0, h0;      \
    42  	MOVD   t1, h1;      \
    43  	ADDZE  t3;          \
    44  	ANDCC  $3, t2, h2;  \
    45  	AND    t2, t4, t0;  \
    46  	ADDC   t0, h0, h0;  \
    47  	ADDE   t3, h1, h1;  \
    48  	SLD    $62, t3, t4; \
    49  	SRD    $2, t2;      \
    50  	ADDZE  h2;          \
    51  	OR     t4, t2, t2;  \
    52  	SRD    $2, t3;      \
    53  	ADDC   t2, h0, h0;  \
    54  	ADDE   t3, h1, h1;  \
    55  	ADDZE  h2
    56  
    57  DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
    58  DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
    59  GLOBL ·poly1305Mask<>(SB), RODATA, $16
    60  
    61  // func update(state *[7]uint64, msg []byte)
    62  TEXT ·update(SB), $0-32
    63  	MOVD state+0(FP), R3
    64  	MOVD msg_base+8(FP), R4
    65  	MOVD msg_len+16(FP), R5
    66  
    67  	MOVD 0(R3), R8   // h0
    68  	MOVD 8(R3), R9   // h1
    69  	MOVD 16(R3), R10 // h2
    70  	MOVD 24(R3), R11 // r0
    71  	MOVD 32(R3), R12 // r1
    72  
    73  	CMP R5, $16
    74  	BLT bytes_between_0_and_15
    75  
    76  loop:
    77  	POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
    78  
    79  multiply:
    80  	POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
    81  	ADD $-16, R5
    82  	CMP R5, $16
    83  	BGE loop
    84  
    85  bytes_between_0_and_15:
    86  	CMP  R5, $0
    87  	BEQ  done
    88  	MOVD $0, R16 // h0
    89  	MOVD $0, R17 // h1
    90  
    91  flush_buffer:
    92  	CMP R5, $8
    93  	BLE just1
    94  
    95  	MOVD $8, R21
    96  	SUB  R21, R5, R21
    97  
    98  	// Greater than 8 -- load the rightmost remaining bytes in msg
    99  	// and put into R17 (h1)
   100  	MOVD (R4)(R21), R17
   101  	MOVD $16, R22
   102  
   103  	// Find the offset to those bytes
   104  	SUB R5, R22, R22
   105  	SLD $3, R22
   106  
   107  	// Shift to get only the bytes in msg
   108  	SRD R22, R17, R17
   109  
   110  	// Put 1 at high end
   111  	MOVD $1, R23
   112  	SLD  $3, R21
   113  	SLD  R21, R23, R23
   114  	OR   R23, R17, R17
   115  
   116  	// Remainder is 8
   117  	MOVD $8, R5
   118  
   119  just1:
   120  	CMP R5, $8
   121  	BLT less8
   122  
   123  	// Exactly 8
   124  	MOVD (R4), R16
   125  
   126  	CMP R17, $0
   127  
   128  	// Check if we've already set R17; if not
   129  	// set 1 to indicate end of msg.
   130  	BNE  carry
   131  	MOVD $1, R17
   132  	BR   carry
   133  
   134  less8:
   135  	MOVD  $0, R16   // h0
   136  	MOVD  $0, R22   // shift count
   137  	CMP   R5, $4
   138  	BLT   less4
   139  	MOVWZ (R4), R16
   140  	ADD   $4, R4
   141  	ADD   $-4, R5
   142  	MOVD  $32, R22
   143  
   144  less4:
   145  	CMP   R5, $2
   146  	BLT   less2
   147  	MOVHZ (R4), R21
   148  	SLD   R22, R21, R21
   149  	OR    R16, R21, R16
   150  	ADD   $16, R22
   151  	ADD   $-2, R5
   152  	ADD   $2, R4
   153  
   154  less2:
   155  	CMP   R5, $0
   156  	BEQ   insert1
   157  	MOVBZ (R4), R21
   158  	SLD   R22, R21, R21
   159  	OR    R16, R21, R16
   160  	ADD   $8, R22
   161  
   162  insert1:
   163  	// Insert 1 at end of msg
   164  	MOVD $1, R21
   165  	SLD  R22, R21, R21
   166  	OR   R16, R21, R16
   167  
   168  carry:
   169  	// Add new values to h0, h1, h2
   170  	ADDC  R16, R8
   171  	ADDE  R17, R9
   172  	ADDZE R10, R10
   173  	MOVD  $16, R5
   174  	ADD   R5, R4
   175  	BR    multiply
   176  
   177  done:
   178  	// Save h0, h1, h2 in state
   179  	MOVD R8, 0(R3)
   180  	MOVD R9, 8(R3)
   181  	MOVD R10, 16(R3)
   182  	RET