github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/crypto/sha512/sha512block_arm64.s (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Based on the Linux Kernel with the following comment: 6 // Algorithm based on https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fb87127bcefc17efab757606e1b1e333fd614dd0 7 // Originally written by Ard Biesheuvel <ard.biesheuvel@linaro.org> 8 9 #include "textflag.h" 10 11 #define SHA512TRANS(i0, i1, i2, i3, i4, rc0, in0) \ 12 VADD in0.D2, rc0.D2, V5.D2 \ 13 VEXT $8, i3.B16, i2.B16, V6.B16 \ 14 VEXT $8, V5.B16, V5.B16, V5.B16 \ 15 VEXT $8, i2.B16, i1.B16, V7.B16 \ 16 VADD V5.D2, i3.D2, i3.D2 \ 17 18 #define SHA512ROUND(i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4) \ 19 VLD1.P 16(R4), [rc1.D2] \ 20 SHA512TRANS(i0, i1, i2, i3, i4, rc0, in0) \ 21 VEXT $8, in4.B16, in3.B16, V5.B16 \ 22 SHA512SU0 in1.D2, in0.D2 \ 23 SHA512H V7.D2, V6, i3 \ 24 SHA512SU1 V5.D2, in2.D2, in0.D2 \ 25 VADD i3.D2, i1.D2, i4.D2 \ 26 SHA512H2 i0.D2, i1, i3 27 28 #define SHA512ROUND_NO_UPDATE(i0, i1, i2, i3, i4, rc0, rc1, in0) \ 29 VLD1.P 16(R4), [rc1.D2] \ 30 SHA512TRANS(i0, i1, i2, i3, i4, rc0, in0) \ 31 SHA512H V7.D2, V6, i3 \ 32 VADD i3.D2, i1.D2, i4.D2 \ 33 SHA512H2 i0.D2, i1, i3 34 35 #define SHA512ROUND_LAST(i0, i1, i2, i3, i4, rc0, in0) \ 36 SHA512TRANS(i0, i1, i2, i3, i4, rc0, in0) \ 37 SHA512H V7.D2, V6, i3 \ 38 VADD i3.D2, i1.D2, i4.D2 \ 39 SHA512H2 i0.D2, i1, i3 40 41 // func blockAsm(dig *digest, p []byte) 42 TEXT ·blockAsm(SB),NOSPLIT,$0 43 MOVD dig+0(FP), R0 44 MOVD p_base+8(FP), R1 45 MOVD p_len+16(FP), R2 46 MOVD ·_K+0(SB), R3 47 48 // long enough to prefetch 49 PRFM (R3), PLDL3KEEP 50 // load digest 51 VLD1 (R0), [V8.D2, V9.D2, V10.D2, V11.D2] 52 loop: 53 // load digest in V0-V3 keeping original in V8-V11 54 VMOV V8.B16, V0.B16 55 VMOV V9.B16, V1.B16 56 VMOV V10.B16, V2.B16 57 VMOV V11.B16, V3.B16 58 59 // load message data in V12-V19 60 VLD1.P 64(R1), [V12.D2, V13.D2, V14.D2, V15.D2] 61 VLD1.P 64(R1), [V16.D2, V17.D2, V18.D2, V19.D2] 62 63 // convert message into big endian format 64 VREV64 V12.B16, V12.B16 65 VREV64 V13.B16, V13.B16 66 VREV64 V14.B16, V14.B16 67 VREV64 V15.B16, V15.B16 68 VREV64 V16.B16, V16.B16 69 VREV64 V17.B16, V17.B16 70 VREV64 V18.B16, V18.B16 71 VREV64 V19.B16, V19.B16 72 73 MOVD R3, R4 74 // load first 4 round consts in V20-V23 75 VLD1.P 64(R4), [V20.D2, V21.D2, V22.D2, V23.D2] 76 77 SHA512ROUND(V0, V1, V2, V3, V4, V20, V24, V12, V13, V19, V16, V17) 78 SHA512ROUND(V3, V0, V4, V2, V1, V21, V25, V13, V14, V12, V17, V18) 79 SHA512ROUND(V2, V3, V1, V4, V0, V22, V26, V14, V15, V13, V18, V19) 80 SHA512ROUND(V4, V2, V0, V1, V3, V23, V27, V15, V16, V14, V19, V12) 81 SHA512ROUND(V1, V4, V3, V0, V2, V24, V28, V16, V17, V15, V12, V13) 82 83 SHA512ROUND(V0, V1, V2, V3, V4, V25, V29, V17, V18, V16, V13, V14) 84 SHA512ROUND(V3, V0, V4, V2, V1, V26, V30, V18, V19, V17, V14, V15) 85 SHA512ROUND(V2, V3, V1, V4, V0, V27, V31, V19, V12, V18, V15, V16) 86 SHA512ROUND(V4, V2, V0, V1, V3, V28, V24, V12, V13, V19, V16, V17) 87 SHA512ROUND(V1, V4, V3, V0, V2, V29, V25, V13, V14, V12, V17, V18) 88 89 SHA512ROUND(V0, V1, V2, V3, V4, V30, V26, V14, V15, V13, V18, V19) 90 SHA512ROUND(V3, V0, V4, V2, V1, V31, V27, V15, V16, V14, V19, V12) 91 SHA512ROUND(V2, V3, V1, V4, V0, V24, V28, V16, V17, V15, V12, V13) 92 SHA512ROUND(V4, V2, V0, V1, V3, V25, V29, V17, V18, V16, V13, V14) 93 SHA512ROUND(V1, V4, V3, V0, V2, V26, V30, V18, V19, V17, V14, V15) 94 95 SHA512ROUND(V0, V1, V2, V3, V4, V27, V31, V19, V12, V18, V15, V16) 96 SHA512ROUND(V3, V0, V4, V2, V1, V28, V24, V12, V13, V19, V16, V17) 97 SHA512ROUND(V2, V3, V1, V4, V0, V29, V25, V13, V14, V12, V17, V18) 98 SHA512ROUND(V4, V2, V0, V1, V3, V30, V26, V14, V15, V13, V18, V19) 99 SHA512ROUND(V1, V4, V3, V0, V2, V31, V27, V15, V16, V14, V19, V12) 100 101 SHA512ROUND(V0, V1, V2, V3, V4, V24, V28, V16, V17, V15, V12, V13) 102 SHA512ROUND(V3, V0, V4, V2, V1, V25, V29, V17, V18, V16, V13, V14) 103 SHA512ROUND(V2, V3, V1, V4, V0, V26, V30, V18, V19, V17, V14, V15) 104 SHA512ROUND(V4, V2, V0, V1, V3, V27, V31, V19, V12, V18, V15, V16) 105 SHA512ROUND(V1, V4, V3, V0, V2, V28, V24, V12, V13, V19, V16, V17) 106 107 SHA512ROUND(V0, V1, V2, V3, V4, V29, V25, V13, V14, V12, V17, V18) 108 SHA512ROUND(V3, V0, V4, V2, V1, V30, V26, V14, V15, V13, V18, V19) 109 SHA512ROUND(V2, V3, V1, V4, V0, V31, V27, V15, V16, V14, V19, V12) 110 SHA512ROUND(V4, V2, V0, V1, V3, V24, V28, V16, V17, V15, V12, V13) 111 SHA512ROUND(V1, V4, V3, V0, V2, V25, V29, V17, V18, V16, V13, V14) 112 113 SHA512ROUND(V0, V1, V2, V3, V4, V26, V30, V18, V19, V17, V14, V15) 114 SHA512ROUND(V3, V0, V4, V2, V1, V27, V31, V19, V12, V18, V15, V16) 115 116 SHA512ROUND_NO_UPDATE(V2, V3, V1, V4, V0, V28, V24, V12) 117 SHA512ROUND_NO_UPDATE(V4, V2, V0, V1, V3, V29, V25, V13) 118 SHA512ROUND_NO_UPDATE(V1, V4, V3, V0, V2, V30, V26, V14) 119 SHA512ROUND_NO_UPDATE(V0, V1, V2, V3, V4, V31, V27, V15) 120 121 SHA512ROUND_LAST(V3, V0, V4, V2, V1, V24, V16) 122 SHA512ROUND_LAST(V2, V3, V1, V4, V0, V25, V17) 123 SHA512ROUND_LAST(V4, V2, V0, V1, V3, V26, V18) 124 SHA512ROUND_LAST(V1, V4, V3, V0, V2, V27, V19) 125 126 // add result to digest 127 VADD V0.D2, V8.D2, V8.D2 128 VADD V1.D2, V9.D2, V9.D2 129 VADD V2.D2, V10.D2, V10.D2 130 VADD V3.D2, V11.D2, V11.D2 131 SUB $128, R2 132 CBNZ R2, loop 133 134 VST1 [V8.D2, V9.D2, V10.D2, V11.D2], (R0) 135 RET