github.com/emmansun/gmsm@v0.29.1/sm3/kdf_mult4_asm.go (about) 1 // Copyright 2024 Sun Yimin. All rights reserved. 2 // Use of this source code is governed by a MIT-style 3 // license that can be found in the LICENSE file. 4 5 //go:build (amd64 || arm64 || s390x || ppc64 || ppc64le) && !purego 6 7 package sm3 8 9 import "encoding/binary" 10 11 // prepare data template: remaining data + [ct] + padding + length 12 // p will be 1 or 2 blocks according to the length of remaining data 13 func prepareInitData(baseMD *digest, p []byte, len, lenStart uint64) { 14 if baseMD.nx > 0 { 15 copy(p, baseMD.x[:baseMD.nx]) 16 } 17 // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. 18 var tmp [64 + 8]byte // padding + length buffer 19 tmp[0] = 0x80 20 padlen := tmp[:lenStart+8] 21 binary.BigEndian.PutUint64(padlen[lenStart:], len) 22 copy(p[baseMD.nx+4:], padlen) 23 } 24 25 // p || state || words 26 // p = 64 * 4 * 2 = 512 27 // state = 8 * 16 = 128 28 // words = 68 * 16 = 1088 29 const preallocSizeBy4 = 1728 30 31 const parallelSize4 = 4 32 33 func kdfBy4(baseMD *digest, keyLen int, limit int) []byte { 34 if limit < 4 { 35 return kdfGeneric(baseMD, keyLen, limit) 36 } 37 38 var t uint64 39 blocks := 1 40 len := baseMD.len + 4 41 remainlen := len % 64 42 if remainlen < 56 { 43 t = 56 - remainlen 44 } else { 45 t = 64 + 56 - remainlen 46 blocks = 2 47 } 48 len <<= 3 49 // prepare temporary buffer 50 tmpStart := parallelSize4 * blocks * BlockSize 51 buffer := make([]byte, preallocSizeBy4) 52 tmp := buffer[tmpStart:] 53 // prepare processing data 54 var dataPtrs [parallelSize4]*byte 55 var data [parallelSize4][]byte 56 var digs [parallelSize4]*[8]uint32 57 var states [parallelSize4][8]uint32 58 59 for j := 0; j < parallelSize4; j++ { 60 digs[j] = &states[j] 61 p := buffer[blocks*BlockSize*j:] 62 data[j] = p 63 dataPtrs[j] = &p[0] 64 if j == 0 { 65 prepareInitData(baseMD, p, len, t) 66 } else { 67 copy(p, data[0]) 68 } 69 } 70 71 var ct uint32 = 1 72 k := make([]byte, limit*Size) 73 ret := k 74 times := limit / parallelSize4 75 for i := 0; i < times; i++ { 76 for j := 0; j < parallelSize4; j++ { 77 // prepare states 78 states[j] = baseMD.h 79 // prepare data 80 binary.BigEndian.PutUint32(data[j][baseMD.nx:], ct) 81 ct++ 82 } 83 blockMultBy4(&digs[0], &dataPtrs[0], &tmp[0], blocks) 84 copyResultsBy4(&states[0][0], &ret[0]) 85 ret = ret[Size*parallelSize4:] 86 } 87 remain := limit % parallelSize4 88 for i := 0; i < remain; i++ { 89 binary.BigEndian.PutUint32(tmp[:], ct) 90 md := *baseMD 91 md.Write(tmp[:4]) 92 h := md.checkSum() 93 copy(ret[i*Size:], h[:]) 94 ct++ 95 } 96 97 return k[:keyLen] 98 } 99 100 //go:noescape 101 func blockMultBy4(dig **[8]uint32, p **byte, buffer *byte, blocks int) 102 103 //go:noescape 104 func copyResultsBy4(dig *uint32, p *byte)