github.com/cloudflare/circl@v1.5.0/pke/kyber/internal/common/ntt.go (about) 1 package common 2 3 // Zetas lists precomputed powers of the primitive root of unity in 4 // Montgomery representation used for the NTT: 5 // 6 // Zetas[i] = ζᵇʳᵛ⁽ⁱ⁾ R mod q 7 // 8 // where ζ = 17, brv(i) is the bitreversal of a 7-bit number and R=2¹⁶ mod q. 9 // 10 // The following Python code generates the Zetas arrays: 11 // 12 // q = 13*2**8 + 1; zeta = 17 13 // R = 2**16 % q # Montgomery const. 14 // def brv(x): return int(''.join(reversed(bin(x)[2:].zfill(7))),2) 15 // print([(pow(zeta, brv(i), q)*R)%q for i in range(128)]) 16 var Zetas = [128]int16{ 17 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 18 962, 2127, 1855, 1468, 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 19 2648, 1017, 732, 608, 1787, 411, 3124, 1758, 1223, 652, 2777, 1015, 20 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 21 1469, 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 22 2604, 448, 2264, 677, 2054, 2226, 430, 555, 843, 2078, 871, 1550, 23 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, 3083, 778, 1159, 24 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 25 3254, 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 26 1994, 2455, 220, 2142, 1670, 2144, 1799, 2051, 794, 1819, 2475, 27 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628, 28 } 29 30 // InvNTTReductions keeps track of which coefficients to apply Barrett 31 // reduction to in Poly.InvNTT(). 32 // 33 // Generated in a lazily: once a butterfly is computed which is about to 34 // overflow the int16, the largest coefficient is reduced. If that is 35 // not enough, the other coefficient is reduced as well. 36 // 37 // This is actually optimal, as proven in https://eprint.iacr.org/2020/1377.pdf 38 var InvNTTReductions = [...]int{ 39 -1, // after layer 1 40 -1, // after layer 2 41 16, 17, 48, 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, 209, 240, 42 241, -1, // after layer 3 43 0, 1, 32, 33, 34, 35, 64, 65, 96, 97, 98, 99, 128, 129, 160, 161, 162, 163, 44 192, 193, 224, 225, 226, 227, -1, // after layer 4 45 2, 3, 66, 67, 68, 69, 70, 71, 130, 131, 194, 195, 196, 197, 198, 46 199, -1, // after layer 5 47 4, 5, 6, 7, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 48 143, -1, // after layer 6 49 -1, // after layer 7 50 } 51 52 // Executes an in-place forward "NTT" on p. 53 // 54 // Assumes the coefficients are in absolute value ≤q. The resulting 55 // coefficients are in absolute value ≤7q. If the input is in Montgomery 56 // form, then the result is in Montgomery form and so (by linearity of the NTT) 57 // if the input is in regular form, then the result is also in regular form. 58 // The order of coefficients will be "tangled". These can be put back into 59 // their proper order by calling Detangle(). 60 func (p *Poly) nttGeneric() { 61 // Note that ℤ_q does not have a primitive 512ᵗʰ root of unity (as 512 62 // does not divide into q-1) and so we cannot do a regular NTT. ℤ_q 63 // does have a primitive 256ᵗʰ root of unity, the smallest of which 64 // is ζ := 17. 65 // 66 // Recall that our base ring R := ℤ_q[x] / (x²⁵⁶ + 1). The polynomial 67 // x²⁵⁶+1 will not split completely (as its roots would be 512ᵗʰ roots 68 // of unity.) However, it does split almost (using ζ¹²⁸ = -1): 69 // 70 // x²⁵⁶ + 1 = (x²)¹²⁸ - ζ¹²⁸ 71 // = ((x²)⁶⁴ - ζ⁶⁴)((x²)⁶⁴ + ζ⁶⁴) 72 // = ((x²)³² - ζ³²)((x²)³² + ζ³²)((x²)³² - ζ⁹⁶)((x²)³² + ζ⁹⁶) 73 // ⋮ 74 // = (x² - ζ)(x² + ζ)(x² - ζ⁶⁵)(x² + ζ⁶⁵) … (x² + ζ¹²⁷) 75 // 76 // Note that the powers of ζ that appear (from the second line down) are 77 // in binary 78 // 79 // 0100000 1100000 80 // 0010000 1010000 0110000 1110000 81 // 0001000 1001000 0101000 1101000 0011000 1011000 0111000 1111000 82 // … 83 // 84 // That is: brv(2), brv(3), brv(4), …, where brv(x) denotes the 7-bit 85 // bitreversal of x. These powers of ζ are given by the Zetas array. 86 // 87 // The polynomials x² ± ζⁱ are irreducible and coprime, hence by 88 // the Chinese Remainder Theorem we know 89 // 90 // ℤ_q[x]/(x²⁵⁶+1) → ℤ_q[x]/(x²-ζ) x … x ℤ_q[x]/(x²+ζ¹²⁷) 91 // 92 // given by a ↦ ( a mod x²-ζ, …, a mod x²+ζ¹²⁷ ) 93 // is an isomorphism, which is the "NTT". It can be efficiently computed by 94 // 95 // 96 // a ↦ ( a mod (x²)⁶⁴ - ζ⁶⁴, a mod (x²)⁶⁴ + ζ⁶⁴ ) 97 // ↦ ( a mod (x²)³² - ζ³², a mod (x²)³² + ζ³², 98 // a mod (x²)⁹⁶ - ζ⁹⁶, a mod (x²)⁹⁶ + ζ⁹⁶ ) 99 // 100 // et cetera 101 // 102 // If N was 8 then this can be pictured in the following diagram: 103 // 104 // https://cnx.org/resources/17ee4dfe517a6adda05377b25a00bf6e6c93c334/File0026.png 105 // 106 // Each cross is a Cooley-Tukey butterfly: it's the map 107 // 108 // (a, b) ↦ (a + ζb, a - ζb) 109 // 110 // for the appropriate power ζ for that column and row group. 111 112 k := 0 // Index into Zetas 113 114 // l runs effectively over the columns in the diagram above; it is half the 115 // height of a row group, i.e. the number of butterflies in each row group. 116 // In the diagram above it would be 4, 2, 1. 117 for l := N / 2; l > 1; l >>= 1 { 118 // On the nᵗʰ iteration of the l-loop, the absolute value of the 119 // coefficients are bounded by nq. 120 121 // offset effectively loops over the row groups in this column; it is 122 // the first row in the row group. 123 for offset := 0; offset < N-l; offset += 2 * l { 124 k++ 125 zeta := int32(Zetas[k]) 126 127 // j loops over each butterfly in the row group. 128 for j := offset; j < offset+l; j++ { 129 t := montReduce(zeta * int32(p[j+l])) 130 p[j+l] = p[j] - t 131 p[j] += t 132 } 133 } 134 } 135 } 136 137 // Executes an in-place inverse "NTT" on p and multiply by the Montgomery 138 // factor R. 139 // 140 // Requires coefficients to be in "tangled" order, see Tangle(). 141 // Assumes the coefficients are in absolute value ≤q. The resulting 142 // coefficients are in absolute value ≤q. If the input is in Montgomery 143 // form, then the result is in Montgomery form and so (by linearity) 144 // if the input is in regular form, then the result is also in regular form. 145 func (p *Poly) invNTTGeneric() { 146 k := 127 // Index into Zetas 147 r := -1 // Index into InvNTTReductions. 148 149 // We basically do the opposite of NTT, but postpone dividing by 2 in the 150 // inverse of the Cooley-Tukey butterfly and accumulate that into a big 151 // division by 2⁷ at the end. See the comments in the NTT() function. 152 153 for l := 2; l < N; l <<= 1 { 154 for offset := 0; offset < N-l; offset += 2 * l { 155 // As we're inverting, we need powers of ζ⁻¹ (instead of ζ). 156 // To be precise, we need ζᵇʳᵛ⁽ᵏ⁾⁻¹²⁸. However, as ζ⁻¹²⁸ = -1, 157 // we can use the existing Zetas table instead of 158 // keeping a separate InvZetas table as in Dilithium. 159 160 minZeta := int32(Zetas[k]) 161 k-- 162 163 for j := offset; j < offset+l; j++ { 164 // Gentleman-Sande butterfly: (a, b) ↦ (a + b, ζ(a-b)) 165 t := p[j+l] - p[j] 166 p[j] += p[j+l] 167 p[j+l] = montReduce(minZeta * int32(t)) 168 169 // Note that if we had |a| < αq and |b| < βq before the 170 // butterfly, then now we have |a| < (α+β)q and |b| < q. 171 } 172 } 173 174 // We let the InvNTTReductions instruct us which coefficients to 175 // Barrett reduce. See TestInvNTTReductions, which tests whether 176 // there is an overflow. 177 for { 178 r++ 179 i := InvNTTReductions[r] 180 if i < 0 { 181 break 182 } 183 p[i] = barrettReduce(p[i]) 184 } 185 } 186 187 for j := 0; j < N; j++ { 188 // Note 1441 = (128)⁻¹ R². The coefficients are bounded by 9q, so 189 // as 1441 * 9 ≈ 2¹⁴ < 2¹⁵, we're within the required bounds 190 // for montReduce(). 191 p[j] = montReduce(1441 * int32(p[j])) 192 } 193 }