github.com/Psiphon-Labs/psiphon-tunnel-core@v2.0.28+incompatible/psiphon/common/crypto/internal/poly1305/sum_s390x.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build gc && !purego 6 // +build gc,!purego 7 8 package poly1305 9 10 import ( 11 "golang.org/x/sys/cpu" 12 ) 13 14 // updateVX is an assembly implementation of Poly1305 that uses vector 15 // instructions. It must only be called if the vector facility (vx) is 16 // available. 17 //go:noescape 18 func updateVX(state *macState, msg []byte) 19 20 // mac is a replacement for macGeneric that uses a larger buffer and redirects 21 // calls that would have gone to updateGeneric to updateVX if the vector 22 // facility is installed. 23 // 24 // A larger buffer is required for good performance because the vector 25 // implementation has a higher fixed cost per call than the generic 26 // implementation. 27 type mac struct { 28 macState 29 30 buffer [16 * TagSize]byte // size must be a multiple of block size (16) 31 offset int 32 } 33 34 func (h *mac) Write(p []byte) (int, error) { 35 nn := len(p) 36 if h.offset > 0 { 37 n := copy(h.buffer[h.offset:], p) 38 if h.offset+n < len(h.buffer) { 39 h.offset += n 40 return nn, nil 41 } 42 p = p[n:] 43 h.offset = 0 44 if cpu.S390X.HasVX { 45 updateVX(&h.macState, h.buffer[:]) 46 } else { 47 updateGeneric(&h.macState, h.buffer[:]) 48 } 49 } 50 51 tail := len(p) % len(h.buffer) // number of bytes to copy into buffer 52 body := len(p) - tail // number of bytes to process now 53 if body > 0 { 54 if cpu.S390X.HasVX { 55 updateVX(&h.macState, p[:body]) 56 } else { 57 updateGeneric(&h.macState, p[:body]) 58 } 59 } 60 h.offset = copy(h.buffer[:], p[body:]) // copy tail bytes - can be 0 61 return nn, nil 62 } 63 64 func (h *mac) Sum(out *[TagSize]byte) { 65 state := h.macState 66 remainder := h.buffer[:h.offset] 67 68 // Use the generic implementation if we have 2 or fewer blocks left 69 // to sum. The vector implementation has a higher startup time. 70 if cpu.S390X.HasVX && len(remainder) > 2*TagSize { 71 updateVX(&state, remainder) 72 } else if len(remainder) > 0 { 73 updateGeneric(&state, remainder) 74 } 75 finalize(out, &state.h, &state.s) 76 }