github.com/emmansun/gmsm@v0.29.1/sm3/kdf_mult8_amd64.go (about)

     1  // Copyright 2024 Sun Yimin. All rights reserved.
     2  // Use of this source code is governed by a MIT-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !purego
     6  
     7  package sm3
     8  
     9  import "encoding/binary"
    10  
    11  // p || state || words
    12  // p = 64 * 8 * 2 = 1024
    13  // state = 8 * 32 = 256
    14  // words = 68 * 32 = 2176
    15  const preallocSizeBy8 = 3456
    16  
    17  const parallelSize8 = 8
    18  
    19  func kdfBy8(baseMD *digest, keyLen int, limit int) []byte {
    20  	var t uint64
    21  	blocks := 1
    22  	len := baseMD.len + 4
    23  	remainlen := len % 64
    24  	if remainlen < 56 {
    25  		t = 56 - remainlen
    26  	} else {
    27  		t = 64 + 56 - remainlen
    28  		blocks = 2
    29  	}
    30  	len <<= 3
    31  
    32  	var ct uint32 = 1
    33  	k := make([]byte, limit * Size)
    34  	ret := k
    35  
    36  	// prepare temporary buffer
    37  	tmpStart := parallelSize8 * blocks * BlockSize
    38  	buffer := make([]byte, preallocSizeBy8)
    39  	tmp := buffer[tmpStart:]
    40  	// prepare processing data
    41  	var dataPtrs [parallelSize8]*byte
    42  	var data [parallelSize8][]byte
    43  	var digs [parallelSize8]*[8]uint32
    44  	var states [parallelSize8][8]uint32
    45  	
    46  	for j := 0; j < parallelSize8; j++ {
    47  		digs[j] = &states[j]
    48  		p := buffer[blocks*BlockSize*j:]
    49  		data[j] = p
    50  		dataPtrs[j] = &p[0]
    51  		if j == 0 {
    52  			prepareInitData(baseMD, p, len, t)
    53  		} else {
    54  			copy(p, data[0])
    55  		}
    56  	}
    57  	
    58  	times := limit / parallelSize8
    59  	for i := 0; i < times; i++ {
    60  		for j := 0; j < parallelSize8; j++ {
    61  			// prepare states
    62  			states[j] = baseMD.h
    63  			// prepare data
    64  			binary.BigEndian.PutUint32(data[j][baseMD.nx:], ct)
    65  			ct++
    66  		}
    67  		blockMultBy8(&digs[0], &dataPtrs[0], &tmp[0], blocks)
    68  		copyResultsBy8(&states[0][0], &ret[0])
    69  		ret = ret[Size*parallelSize8:]
    70  	}
    71  
    72  	remain := limit % parallelSize8
    73  	if remain >= parallelSize4 {
    74  		for j := 0; j < parallelSize4; j++ {
    75  			// prepare states
    76  			states[j] = baseMD.h
    77  			// prepare data
    78  			binary.BigEndian.PutUint32(data[j][baseMD.nx:], ct)
    79  			ct++
    80  		}
    81  		blockMultBy4(&digs[0], &dataPtrs[0], &tmp[0], blocks)
    82  		copyResultsBy4(&states[0][0], &ret[0])
    83  		ret = ret[Size*parallelSize4:]
    84  		remain -= parallelSize4
    85  	}
    86  
    87  	for i := 0; i < remain; i++ {
    88  		binary.BigEndian.PutUint32(tmp[:], ct)
    89  		md := *baseMD
    90  		md.Write(tmp[:4])
    91  		h := md.checkSum()
    92  		copy(ret[i*Size:], h[:])
    93  		ct++
    94  	}
    95  
    96  	return k[:keyLen]
    97  }
    98  
    99  //go:noescape
   100  func blockMultBy8(dig **[8]uint32, p **byte, buffer *byte, blocks int)
   101  
   102  //go:noescape
   103  func transposeMatrix8x8(dig **[8]uint32)
   104  
   105  //go:noescape
   106  func copyResultsBy8(dig *uint32, p *byte)