github.com/psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/internal/poly1305/sum_s390x.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build gc && !purego
     6  // +build gc,!purego
     7  
     8  package poly1305
     9  
    10  import (
    11  	"golang.org/x/sys/cpu"
    12  )
    13  
    14  // updateVX is an assembly implementation of Poly1305 that uses vector
    15  // instructions. It must only be called if the vector facility (vx) is
    16  // available.
    17  //go:noescape
    18  func updateVX(state *macState, msg []byte)
    19  
    20  // mac is a replacement for macGeneric that uses a larger buffer and redirects
    21  // calls that would have gone to updateGeneric to updateVX if the vector
    22  // facility is installed.
    23  //
    24  // A larger buffer is required for good performance because the vector
    25  // implementation has a higher fixed cost per call than the generic
    26  // implementation.
    27  type mac struct {
    28  	macState
    29  
    30  	buffer [16 * TagSize]byte // size must be a multiple of block size (16)
    31  	offset int
    32  }
    33  
    34  func (h *mac) Write(p []byte) (int, error) {
    35  	nn := len(p)
    36  	if h.offset > 0 {
    37  		n := copy(h.buffer[h.offset:], p)
    38  		if h.offset+n < len(h.buffer) {
    39  			h.offset += n
    40  			return nn, nil
    41  		}
    42  		p = p[n:]
    43  		h.offset = 0
    44  		if cpu.S390X.HasVX {
    45  			updateVX(&h.macState, h.buffer[:])
    46  		} else {
    47  			updateGeneric(&h.macState, h.buffer[:])
    48  		}
    49  	}
    50  
    51  	tail := len(p) % len(h.buffer) // number of bytes to copy into buffer
    52  	body := len(p) - tail          // number of bytes to process now
    53  	if body > 0 {
    54  		if cpu.S390X.HasVX {
    55  			updateVX(&h.macState, p[:body])
    56  		} else {
    57  			updateGeneric(&h.macState, p[:body])
    58  		}
    59  	}
    60  	h.offset = copy(h.buffer[:], p[body:]) // copy tail bytes - can be 0
    61  	return nn, nil
    62  }
    63  
    64  func (h *mac) Sum(out *[TagSize]byte) {
    65  	state := h.macState
    66  	remainder := h.buffer[:h.offset]
    67  
    68  	// Use the generic implementation if we have 2 or fewer blocks left
    69  	// to sum. The vector implementation has a higher startup time.
    70  	if cpu.S390X.HasVX && len(remainder) > 2*TagSize {
    71  		updateVX(&state, remainder)
    72  	} else if len(remainder) > 0 {
    73  		updateGeneric(&state, remainder)
    74  	}
    75  	finalize(out, &state.h, &state.s)
    76  }