github.com/cloudflare/circl@v1.5.0/pke/kyber/internal/common/ntt.go

github.com/cloudflare/circl@v1.5.0/pke/kyber/internal/common/ntt.go (about)

     1  package common
     2  
     3  // Zetas lists precomputed powers of the primitive root of unity in
     4  // Montgomery representation used for the NTT:
     5  //
     6  //	Zetas[i] = ζᵇʳᵛ⁽ⁱ⁾ R mod q
     7  //
     8  // where ζ = 17, brv(i) is the bitreversal of a 7-bit number and R=2¹⁶ mod q.
     9  //
    10  // The following Python code generates the Zetas arrays:
    11  //
    12  //	q = 13*2**8 + 1; zeta = 17
    13  //	R = 2**16 % q # Montgomery const.
    14  //	def brv(x): return int(''.join(reversed(bin(x)[2:].zfill(7))),2)
    15  //	print([(pow(zeta, brv(i), q)*R)%q for i in range(128)])
    16  var Zetas = [128]int16{
    17  	2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182,
    18  	962, 2127, 1855, 1468, 573, 2004, 264, 383, 2500, 1458, 1727, 3199,
    19  	2648, 1017, 732, 608, 1787, 411, 3124, 1758, 1223, 652, 2777, 1015,
    20  	2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126,
    21  	1469, 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821,
    22  	2604, 448, 2264, 677, 2054, 2226, 430, 555, 843, 2078, 871, 1550,
    23  	105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, 3083, 778, 1159,
    24  	3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173,
    25  	3254, 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218,
    26  	1994, 2455, 220, 2142, 1670, 2144, 1799, 2051, 794, 1819, 2475,
    27  	2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628,
    28  }
    29  
    30  // InvNTTReductions keeps track of which coefficients to apply Barrett
    31  // reduction to in Poly.InvNTT().
    32  //
    33  // Generated in a lazily: once a butterfly is computed which is about to
    34  // overflow the int16, the largest coefficient is reduced.  If that is
    35  // not enough, the other coefficient is reduced as well.
    36  //
    37  // This is actually optimal, as proven in https://eprint.iacr.org/2020/1377.pdf
    38  var InvNTTReductions = [...]int{
    39  	-1, // after layer 1
    40  	-1, // after layer 2
    41  	16, 17, 48, 49, 80, 81, 112, 113, 144, 145, 176, 177, 208, 209, 240,
    42  	241, -1, // after layer 3
    43  	0, 1, 32, 33, 34, 35, 64, 65, 96, 97, 98, 99, 128, 129, 160, 161, 162, 163,
    44  	192, 193, 224, 225, 226, 227, -1, // after layer 4
    45  	2, 3, 66, 67, 68, 69, 70, 71, 130, 131, 194, 195, 196, 197, 198,
    46  	199, -1, // after layer 5
    47  	4, 5, 6, 7, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
    48  	143, -1, // after layer 6
    49  	-1, //  after layer 7
    50  }
    51  
    52  // Executes an in-place forward "NTT" on p.
    53  //
    54  // Assumes the coefficients are in absolute value ≤q.  The resulting
    55  // coefficients are in absolute value ≤7q.  If the input is in Montgomery
    56  // form, then the result is in Montgomery form and so (by linearity of the NTT)
    57  // if the input is in regular form, then the result is also in regular form.
    58  // The order of coefficients will be "tangled". These can be put back into
    59  // their proper order by calling Detangle().
    60  func (p *Poly) nttGeneric() {
    61  	// Note that ℤ_q does not have a primitive 512ᵗʰ root of unity (as 512
    62  	// does not divide into q-1) and so we cannot do a regular NTT.  ℤ_q
    63  	// does have a primitive 256ᵗʰ root of unity, the smallest of which
    64  	// is ζ := 17.
    65  	//
    66  	// Recall that our base ring R := ℤ_q[x] / (x²⁵⁶ + 1).  The polynomial
    67  	// x²⁵⁶+1 will not split completely (as its roots would be 512ᵗʰ roots
    68  	// of unity.)  However, it does split almost (using ζ¹²⁸ = -1):
    69  	//
    70  	// x²⁵⁶ + 1 = (x²)¹²⁸ - ζ¹²⁸
    71  	//          = ((x²)⁶⁴ - ζ⁶⁴)((x²)⁶⁴ + ζ⁶⁴)
    72  	//          = ((x²)³² - ζ³²)((x²)³² + ζ³²)((x²)³² - ζ⁹⁶)((x²)³² + ζ⁹⁶)
    73  	//          ⋮
    74  	//          = (x² - ζ)(x² + ζ)(x² - ζ⁶⁵)(x² + ζ⁶⁵) … (x² + ζ¹²⁷)
    75  	//
    76  	// Note that the powers of ζ that appear (from the second line down) are
    77  	// in binary
    78  	//
    79  	// 0100000 1100000
    80  	// 0010000 1010000 0110000 1110000
    81  	// 0001000 1001000 0101000 1101000 0011000 1011000 0111000 1111000
    82  	//         …
    83  	//
    84  	// That is: brv(2), brv(3), brv(4), …, where brv(x) denotes the 7-bit
    85  	// bitreversal of x.  These powers of ζ are given by the Zetas array.
    86  	//
    87  	// The polynomials x² ± ζⁱ are irreducible and coprime, hence by
    88  	// the Chinese Remainder Theorem we know
    89  	//
    90  	//  ℤ_q[x]/(x²⁵⁶+1) → ℤ_q[x]/(x²-ζ) x … x  ℤ_q[x]/(x²+ζ¹²⁷)
    91  	//
    92  	// given by a ↦ ( a mod x²-ζ, …, a mod x²+ζ¹²⁷ )
    93  	// is an isomorphism, which is the "NTT".  It can be efficiently computed by
    94  	//
    95  	//
    96  	//  a ↦ ( a mod (x²)⁶⁴ - ζ⁶⁴, a mod (x²)⁶⁴ + ζ⁶⁴ )
    97  	//    ↦ ( a mod (x²)³² - ζ³², a mod (x²)³² + ζ³²,
    98  	//        a mod (x²)⁹⁶ - ζ⁹⁶, a mod (x²)⁹⁶ + ζ⁹⁶ )
    99  	//
   100  	//	    et cetera
   101  	//
   102  	// If N was 8 then this can be pictured in the following diagram:
   103  	//
   104  	//  https://cnx.org/resources/17ee4dfe517a6adda05377b25a00bf6e6c93c334/File0026.png
   105  	//
   106  	// Each cross is a Cooley-Tukey butterfly: it's the map
   107  	//
   108  	//  (a, b) ↦ (a + ζb, a - ζb)
   109  	//
   110  	// for the appropriate power ζ for that column and row group.
   111  
   112  	k := 0 // Index into Zetas
   113  
   114  	// l runs effectively over the columns in the diagram above; it is half the
   115  	// height of a row group, i.e. the number of butterflies in each row group.
   116  	// In the diagram above it would be 4, 2, 1.
   117  	for l := N / 2; l > 1; l >>= 1 {
   118  		// On the nᵗʰ iteration of the l-loop, the absolute value of the
   119  		// coefficients are bounded by nq.
   120  
   121  		// offset effectively loops over the row groups in this column; it is
   122  		// the first row in the row group.
   123  		for offset := 0; offset < N-l; offset += 2 * l {
   124  			k++
   125  			zeta := int32(Zetas[k])
   126  
   127  			// j loops over each butterfly in the row group.
   128  			for j := offset; j < offset+l; j++ {
   129  				t := montReduce(zeta * int32(p[j+l]))
   130  				p[j+l] = p[j] - t
   131  				p[j] += t
   132  			}
   133  		}
   134  	}
   135  }
   136  
   137  // Executes an in-place inverse "NTT" on p and multiply by the Montgomery
   138  // factor R.
   139  //
   140  // Requires coefficients to be in "tangled" order, see Tangle().
   141  // Assumes the coefficients are in absolute value ≤q.  The resulting
   142  // coefficients are in absolute value ≤q.  If the input is in Montgomery
   143  // form, then the result is in Montgomery form and so (by linearity)
   144  // if the input is in regular form, then the result is also in regular form.
   145  func (p *Poly) invNTTGeneric() {
   146  	k := 127 // Index into Zetas
   147  	r := -1  // Index into InvNTTReductions.
   148  
   149  	// We basically do the opposite of NTT, but postpone dividing by 2 in the
   150  	// inverse of the Cooley-Tukey butterfly and accumulate that into a big
   151  	// division by 2⁷ at the end.  See the comments in the NTT() function.
   152  
   153  	for l := 2; l < N; l <<= 1 {
   154  		for offset := 0; offset < N-l; offset += 2 * l {
   155  			// As we're inverting, we need powers of ζ⁻¹ (instead of ζ).
   156  			// To be precise, we need ζᵇʳᵛ⁽ᵏ⁾⁻¹²⁸. However, as ζ⁻¹²⁸ = -1,
   157  			// we can use the existing Zetas table instead of
   158  			// keeping a separate InvZetas table as in Dilithium.
   159  
   160  			minZeta := int32(Zetas[k])
   161  			k--
   162  
   163  			for j := offset; j < offset+l; j++ {
   164  				// Gentleman-Sande butterfly: (a, b) ↦ (a + b, ζ(a-b))
   165  				t := p[j+l] - p[j]
   166  				p[j] += p[j+l]
   167  				p[j+l] = montReduce(minZeta * int32(t))
   168  
   169  				// Note that if we had |a| < αq and |b| < βq before the
   170  				// butterfly, then now we have |a| < (α+β)q and |b| < q.
   171  			}
   172  		}
   173  
   174  		// We let the InvNTTReductions instruct us which coefficients to
   175  		// Barrett reduce.  See TestInvNTTReductions, which tests whether
   176  		// there is an overflow.
   177  		for {
   178  			r++
   179  			i := InvNTTReductions[r]
   180  			if i < 0 {
   181  				break
   182  			}
   183  			p[i] = barrettReduce(p[i])
   184  		}
   185  	}
   186  
   187  	for j := 0; j < N; j++ {
   188  		// Note 1441 = (128)⁻¹ R².  The coefficients are bounded by 9q, so
   189  		// as 1441 * 9 ≈ 2¹⁴ < 2¹⁵, we're within the required bounds
   190  		// for montReduce().
   191  		p[j] = montReduce(1441 * int32(p[j]))
   192  	}
   193  }