github.com/emmansun/gmsm@v0.29.1/sm3/kdf_mult4_asm.go (about)

     1  // Copyright 2024 Sun Yimin. All rights reserved.
     2  // Use of this source code is governed by a MIT-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build (amd64 || arm64 || s390x || ppc64 || ppc64le) && !purego
     6  
     7  package sm3
     8  
     9  import "encoding/binary"
    10  
    11  // prepare data template: remaining data + [ct] + padding + length
    12  // p will be 1 or 2 blocks according to the length of remaining data
    13  func prepareInitData(baseMD *digest, p []byte, len, lenStart uint64) {
    14  	if baseMD.nx > 0 {
    15  		copy(p, baseMD.x[:baseMD.nx])
    16  	}
    17  	// Padding. Add a 1 bit and 0 bits until 56 bytes mod 64.
    18  	var tmp [64 + 8]byte // padding + length buffer
    19  	tmp[0] = 0x80
    20  	padlen := tmp[:lenStart+8]
    21  	binary.BigEndian.PutUint64(padlen[lenStart:], len)
    22  	copy(p[baseMD.nx+4:], padlen)
    23  }
    24  
    25  // p || state || words
    26  // p = 64 * 4 * 2 = 512
    27  // state = 8 * 16 = 128
    28  // words = 68 * 16 = 1088
    29  const preallocSizeBy4 = 1728
    30  
    31  const parallelSize4 = 4
    32  
    33  func kdfBy4(baseMD *digest, keyLen int, limit int) []byte {
    34  	if limit < 4 {
    35  		return kdfGeneric(baseMD, keyLen, limit)
    36  	}
    37  	
    38  	var t uint64
    39  	blocks := 1
    40  	len := baseMD.len + 4
    41  	remainlen := len % 64
    42  	if remainlen < 56 {
    43  		t = 56 - remainlen
    44  	} else {
    45  		t = 64 + 56 - remainlen
    46  		blocks = 2
    47  	}
    48  	len <<= 3
    49  	// prepare temporary buffer
    50  	tmpStart := parallelSize4 * blocks * BlockSize
    51  	buffer := make([]byte, preallocSizeBy4)
    52  	tmp := buffer[tmpStart:]
    53  	// prepare processing data
    54  	var dataPtrs [parallelSize4]*byte
    55  	var data [parallelSize4][]byte
    56  	var digs [parallelSize4]*[8]uint32
    57  	var states [parallelSize4][8]uint32
    58  	
    59  	for j := 0; j < parallelSize4; j++ {
    60  		digs[j] = &states[j]
    61  		p := buffer[blocks*BlockSize*j:]
    62  		data[j] = p
    63  		dataPtrs[j] = &p[0]
    64  		if j == 0 {
    65  			prepareInitData(baseMD, p, len, t)
    66  		} else {
    67  			copy(p, data[0])
    68  		}
    69  	}
    70  
    71  	var ct uint32 = 1
    72  	k := make([]byte, limit*Size)
    73  	ret := k
    74  	times := limit / parallelSize4
    75  	for i := 0; i < times; i++ {
    76  		for j := 0; j < parallelSize4; j++ {
    77  			// prepare states
    78  			states[j] = baseMD.h
    79  			// prepare data
    80  			binary.BigEndian.PutUint32(data[j][baseMD.nx:], ct)
    81  			ct++
    82  		}
    83  		blockMultBy4(&digs[0], &dataPtrs[0], &tmp[0], blocks)
    84  		copyResultsBy4(&states[0][0], &ret[0])
    85  		ret = ret[Size*parallelSize4:]
    86  	}
    87  	remain := limit % parallelSize4
    88  	for i := 0; i < remain; i++ {
    89  		binary.BigEndian.PutUint32(tmp[:], ct)
    90  		md := *baseMD
    91  		md.Write(tmp[:4])
    92  		h := md.checkSum()
    93  		copy(ret[i*Size:], h[:])
    94  		ct++
    95  	}
    96  
    97  	return k[:keyLen]
    98  }
    99  
   100  //go:noescape
   101  func blockMultBy4(dig **[8]uint32, p **byte, buffer *byte, blocks int)
   102  
   103  //go:noescape
   104  func copyResultsBy4(dig *uint32, p *byte)