gitee.com/zhaochuninhefei/gmgo@v0.0.31-0.20240209061119-069254a02979/sm4/gcm_cipher_asm.go (about) 1 //go:build amd64 || arm64 2 // +build amd64 arm64 3 4 package sm4 5 6 import ( 7 "crypto/cipher" 8 goSubtle "crypto/subtle" 9 "encoding/binary" 10 "errors" 11 12 "gitee.com/zhaochuninhefei/gmgo/internal/subtle" 13 "gitee.com/zhaochuninhefei/gmgo/internal/xor" 14 ) 15 16 // Assert that sm4CipherAsm implements the gcmAble interface. 17 var _ gcmAble = (*sm4CipherAsm)(nil) 18 19 // NewGCM returns the SM4 cipher wrapped in Galois Counter Mode. This is only 20 // called by crypto/cipher.NewGCM via the gcmAble interface. 21 func (sm4c *sm4CipherAsm) NewGCM(nonceSize, tagSize int) (cipher.AEAD, error) { 22 // zclog.Debug("sm4.NewGCM in sm4/gcm_cipher_asm.go") 23 var key [gcmBlockSize]byte 24 sm4c.Encrypt(key[:], key[:]) 25 g := &gcm{cipher: sm4c, nonceSize: nonceSize, tagSize: tagSize} 26 // We precompute 16 multiples of |key|. However, when we do lookups 27 // into this table we'll be using bits from a field element and 28 // therefore the bits will be in the reverse order. So normally one 29 // would expect, say, 4*key to be in index 4 of the table but due to 30 // this bit ordering it will actually be in index 0010 (base 2) = 2. 31 x := gcmFieldElement{ 32 binary.BigEndian.Uint64(key[:8]), 33 binary.BigEndian.Uint64(key[8:]), 34 } 35 g.productTable[reverseBits(1)] = x 36 37 for i := 2; i < 16; i += 2 { 38 g.productTable[reverseBits(i)] = gcmDouble(&g.productTable[reverseBits(i/2)]) 39 g.productTable[reverseBits(i+1)] = gcmAdd(&g.productTable[reverseBits(i)], &x) 40 } 41 42 return g, nil 43 } 44 45 // gcmFieldElement represents a value in GF(2¹²⁸). In order to reflect the GCM 46 // standard and make binary.BigEndian suitable for marshaling these values, the 47 // bits are stored in big endian order. For example: 48 // the coefficient of x⁰ can be obtained by v.low >> 63. 49 // the coefficient of x⁶³ can be obtained by v.low & 1. 50 // the coefficient of x⁶⁴ can be obtained by v.high >> 63. 51 // the coefficient of x¹²⁷ can be obtained by v.high & 1. 52 type gcmFieldElement struct { 53 low, high uint64 54 } 55 56 // gcm represents a Galois Counter Mode with a specific key. See 57 // https://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/gcm/gcm-revised-spec.pdf 58 type gcm struct { 59 cipher *sm4CipherAsm 60 nonceSize int 61 tagSize int 62 // productTable contains the first sixteen powers of the key, H. 63 // However, they are in bit reversed order. See NewGCMWithNonceSize. 64 productTable [16]gcmFieldElement 65 } 66 67 const ( 68 gcmBlockSize = 16 69 gcmTagSize = 16 70 gcmMinimumTagSize = 12 // NIST SP 800-38D recommends tags with 12 or more bytes. 71 gcmStandardNonceSize = 12 72 ) 73 74 func (g *gcm) NonceSize() int { 75 return g.nonceSize 76 } 77 78 func (g *gcm) Overhead() int { 79 return g.tagSize 80 } 81 82 func (g *gcm) Seal(dst, nonce, plaintext, data []byte) []byte { 83 // zclog.Debug("sm4.Seal in sm4/gcm_cipher_asm.go") 84 if len(nonce) != g.nonceSize { 85 panic("cipher: incorrect nonce length given to GCM") 86 } 87 if uint64(len(plaintext)) > ((1<<32)-2)*uint64(g.cipher.BlockSize()) { 88 panic("cipher: message too large for GCM") 89 } 90 91 ret, out := subtle.SliceForAppend(dst, len(plaintext)+g.tagSize) 92 if subtle.InexactOverlap(out, plaintext) { 93 panic("cipher: invalid buffer overlap") 94 } 95 96 var counter, tagMask [gcmBlockSize]byte 97 g.deriveCounter(&counter, nonce) 98 99 g.cipher.Encrypt(tagMask[:], counter[:]) 100 gcmInc32(&counter) 101 102 g.counterCrypt(out, plaintext, &counter) 103 104 var tag [gcmTagSize]byte 105 g.auth(tag[:], out[:len(plaintext)], data, &tagMask) 106 copy(out[len(plaintext):], tag[:]) 107 108 return ret 109 } 110 111 var errOpen = errors.New("cipher: message authentication failed") 112 113 func (g *gcm) Open(dst, nonce, ciphertext, data []byte) ([]byte, error) { 114 // zclog.Debug("sm4.Open in sm4/gcm_cipher_asm.go") 115 if len(nonce) != g.nonceSize { 116 panic("cipher: incorrect nonce length given to GCM") 117 } 118 // Sanity check to prevent the authentication from always succeeding if an implementation 119 // leaves tagSize uninitialized, for example. 120 if g.tagSize < gcmMinimumTagSize { 121 panic("cipher: incorrect GCM tag size") 122 } 123 124 if len(ciphertext) < g.tagSize { 125 return nil, errOpen 126 } 127 if uint64(len(ciphertext)) > ((1<<32)-2)*uint64(g.cipher.BlockSize())+uint64(g.tagSize) { 128 return nil, errOpen 129 } 130 131 tag := ciphertext[len(ciphertext)-g.tagSize:] 132 ciphertext = ciphertext[:len(ciphertext)-g.tagSize] 133 134 var counter, tagMask [gcmBlockSize]byte 135 g.deriveCounter(&counter, nonce) 136 137 g.cipher.Encrypt(tagMask[:], counter[:]) 138 gcmInc32(&counter) 139 140 var expectedTag [gcmTagSize]byte 141 g.auth(expectedTag[:], ciphertext, data, &tagMask) 142 143 ret, out := subtle.SliceForAppend(dst, len(ciphertext)) 144 if subtle.InexactOverlap(out, ciphertext) { 145 panic("cipher: invalid buffer overlap") 146 } 147 148 if goSubtle.ConstantTimeCompare(expectedTag[:g.tagSize], tag) != 1 { 149 // The AESNI code decrypts and authenticates concurrently, and 150 // so overwrites dst in the event of a tag mismatch. That 151 // behavior is mimicked here in order to be consistent across 152 // platforms. 153 for i := range out { 154 out[i] = 0 155 } 156 return nil, errOpen 157 } 158 159 g.counterCrypt(out, ciphertext, &counter) 160 161 return ret, nil 162 } 163 164 // reverseBits reverses the order of the bits of 4-bit number in i. 165 func reverseBits(i int) int { 166 i = ((i << 2) & 0xc) | ((i >> 2) & 0x3) 167 i = ((i << 1) & 0xa) | ((i >> 1) & 0x5) 168 return i 169 } 170 171 // gcmAdd adds two elements of GF(2¹²⁸) and returns the sum. 172 func gcmAdd(x, y *gcmFieldElement) gcmFieldElement { 173 // Addition in a characteristic 2 field is just XOR. 174 return gcmFieldElement{x.low ^ y.low, x.high ^ y.high} 175 } 176 177 // gcmDouble returns the result of doubling an element of GF(2¹²⁸). 178 func gcmDouble(x *gcmFieldElement) (double gcmFieldElement) { 179 msbSet := x.high&1 == 1 180 181 // Because of the bit-ordering, doubling is actually a right shift. 182 double.high = x.high >> 1 183 double.high |= x.low << 63 184 double.low = x.low >> 1 185 186 // If the most-significant bit was set before shifting then it, 187 // conceptually, becomes a term of x^128. This is greater than the 188 // irreducible polynomial so the result has to be reduced. The 189 // irreducible polynomial is 1+x+x^2+x^7+x^128. We can subtract that to 190 // eliminate the term at x^128 which also means subtracting the other 191 // four terms. In characteristic 2 fields, subtraction == addition == 192 // XOR. 193 if msbSet { 194 double.low ^= 0xe100000000000000 195 } 196 197 return 198 } 199 200 var gcmReductionTable = []uint16{ 201 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0, 202 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0, 203 } 204 205 // mul sets y to y*H, where H is the GCM key, fixed during NewGCMWithNonceSize. 206 func (g *gcm) mul(y *gcmFieldElement) { 207 var z gcmFieldElement 208 209 for i := 0; i < 2; i++ { 210 word := y.high 211 if i == 1 { 212 word = y.low 213 } 214 215 // Multiplication works by multiplying z by 16 and adding in 216 // one of the precomputed multiples of H. 217 for j := 0; j < 64; j += 4 { 218 msw := z.high & 0xf 219 z.high >>= 4 220 z.high |= z.low << 60 221 z.low >>= 4 222 z.low ^= uint64(gcmReductionTable[msw]) << 48 223 224 // the values in |table| are ordered for 225 // little-endian bit positions. See the comment 226 // in NewGCMWithNonceSize. 227 t := &g.productTable[word&0xf] 228 229 z.low ^= t.low 230 z.high ^= t.high 231 word >>= 4 232 } 233 } 234 235 *y = z 236 } 237 238 // updateBlocks extends y with more polynomial terms from blocks, based on 239 // Horner's rule. There must be a multiple of gcmBlockSize bytes in blocks. 240 func (g *gcm) updateBlocks(y *gcmFieldElement, blocks []byte) { 241 for len(blocks) > 0 { 242 y.low ^= binary.BigEndian.Uint64(blocks) 243 y.high ^= binary.BigEndian.Uint64(blocks[8:]) 244 g.mul(y) 245 blocks = blocks[gcmBlockSize:] 246 } 247 } 248 249 // update extends y with more polynomial terms from data. If data is not a 250 // multiple of gcmBlockSize bytes long then the remainder is zero padded. 251 func (g *gcm) update(y *gcmFieldElement, data []byte) { 252 fullBlocks := (len(data) >> 4) << 4 253 g.updateBlocks(y, data[:fullBlocks]) 254 255 if len(data) != fullBlocks { 256 var partialBlock [gcmBlockSize]byte 257 copy(partialBlock[:], data[fullBlocks:]) 258 g.updateBlocks(y, partialBlock[:]) 259 } 260 } 261 262 // gcmInc32 treats the final four bytes of counterBlock as a big-endian value 263 // and increments it. 264 func gcmInc32(counterBlock *[16]byte) { 265 ctr := counterBlock[len(counterBlock)-4:] 266 binary.BigEndian.PutUint32(ctr, binary.BigEndian.Uint32(ctr)+1) 267 } 268 269 // counterCrypt crypts in to out using g.cipher in counter mode. 270 func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) { 271 mask := make([]byte, g.cipher.blocksSize) 272 counters := make([]byte, g.cipher.blocksSize) 273 274 for len(in) >= g.cipher.blocksSize { 275 for i := 0; i < g.cipher.batchBlocks; i++ { 276 copy(counters[i*gcmBlockSize:(i+1)*gcmBlockSize], counter[:]) 277 gcmInc32(counter) 278 } 279 g.cipher.EncryptBlocks(mask, counters) 280 xor.XorWords(out, in, mask[:]) 281 out = out[g.cipher.blocksSize:] 282 in = in[g.cipher.blocksSize:] 283 } 284 285 if len(in) > 0 { 286 blocks := (len(in) + gcmBlockSize - 1) / gcmBlockSize 287 for i := 0; i < blocks; i++ { 288 copy(counters[i*gcmBlockSize:], counter[:]) 289 gcmInc32(counter) 290 } 291 g.cipher.EncryptBlocks(mask, counters) 292 xor.XorBytes(out, in, mask[:blocks*gcmBlockSize]) 293 } 294 } 295 296 // deriveCounter computes the initial GCM counter state from the given nonce. 297 // See NIST SP 800-38D, section 7.1. This assumes that counter is filled with 298 // zeros on entry. 299 func (g *gcm) deriveCounter(counter *[gcmBlockSize]byte, nonce []byte) { 300 // GCM has two modes of operation with respect to the initial counter 301 // state: a "fast path" for 96-bit (12-byte) nonces, and a "slow path" 302 // for nonces of other lengths. For a 96-bit nonce, the nonce, along 303 // with a four-byte big-endian counter starting at one, is used 304 // directly as the starting counter. For other nonce sizes, the counter 305 // is computed by passing it through the GHASH function. 306 if len(nonce) == gcmStandardNonceSize { 307 copy(counter[:], nonce) 308 counter[gcmBlockSize-1] = 1 309 } else { 310 var y gcmFieldElement 311 g.update(&y, nonce) 312 y.high ^= uint64(len(nonce)) * 8 313 g.mul(&y) 314 binary.BigEndian.PutUint64(counter[:8], y.low) 315 binary.BigEndian.PutUint64(counter[8:], y.high) 316 } 317 } 318 319 // auth calculates GHASH(ciphertext, additionalData), masks the result with 320 // tagMask and writes the result to out. 321 func (g *gcm) auth(out, ciphertext, additionalData []byte, tagMask *[gcmTagSize]byte) { 322 var y gcmFieldElement 323 g.update(&y, additionalData) 324 g.update(&y, ciphertext) 325 326 y.low ^= uint64(len(additionalData)) * 8 327 y.high ^= uint64(len(ciphertext)) * 8 328 329 g.mul(&y) 330 331 binary.BigEndian.PutUint64(out, y.low) 332 binary.BigEndian.PutUint64(out[8:], y.high) 333 334 xor.XorWords(out, out, tagMask[:]) 335 }