github.com/cloudflare/circl@v1.5.0/sign/dilithium/mode3/internal/sample.go (about)

     1  package internal
     2  
     3  import (
     4  	"encoding/binary"
     5  
     6  	"github.com/cloudflare/circl/internal/sha3"
     7  	common "github.com/cloudflare/circl/sign/internal/dilithium"
     8  	"github.com/cloudflare/circl/simd/keccakf1600"
     9  )
    10  
    11  // DeriveX4Available indicates whether the system supports the quick fourway
    12  // sampling variants like PolyDeriveUniformX4.
    13  var DeriveX4Available = keccakf1600.IsEnabledX4()
    14  
    15  // For each i, sample ps[i] uniformly from the given seed and nonces[i].
    16  // ps[i] may be nil and is ignored in that case.
    17  //
    18  // Can only be called when DeriveX4Available is true.
    19  func PolyDeriveUniformX4(ps [4]*common.Poly, seed *[32]byte, nonces [4]uint16) {
    20  	var perm keccakf1600.StateX4
    21  	state := perm.Initialize(false)
    22  
    23  	// Absorb the seed in the four states
    24  	for i := 0; i < 4; i++ {
    25  		v := binary.LittleEndian.Uint64(seed[8*i : 8*(i+1)])
    26  		for j := 0; j < 4; j++ {
    27  			state[i*4+j] = v
    28  		}
    29  	}
    30  
    31  	// Absorb the nonces, the SHAKE128 domain separator (0b1111), the
    32  	// start of the padding (0b...001) and the end of the padding 0b100...
    33  	// Recall that the rate of SHAKE128 is 168 --- i.e. 21 uint64s.
    34  	for j := 0; j < 4; j++ {
    35  		state[4*4+j] = uint64(nonces[j]) | (0x1f << 16)
    36  		state[20*4+j] = 0x80 << 56
    37  	}
    38  
    39  	var idx [4]int // indices into ps
    40  	for j := 0; j < 4; j++ {
    41  		if ps[j] == nil {
    42  			idx[j] = common.N // mark nil polynomial as completed
    43  		}
    44  	}
    45  
    46  	done := false
    47  	for !done {
    48  		// Applies KeccaK-f[1600] to state to get the next 21 uint64s of each
    49  		// of the four SHAKE128 streams.
    50  		perm.Permute()
    51  
    52  		done = true
    53  
    54  	PolyLoop:
    55  		for j := 0; j < 4; j++ {
    56  			if idx[j] == common.N {
    57  				continue
    58  			}
    59  			for i := 0; i < 7; i++ {
    60  				var t [8]uint32
    61  				t[0] = uint32(state[i*3*4+j] & 0x7fffff)
    62  				t[1] = uint32((state[i*3*4+j] >> 24) & 0x7fffff)
    63  				t[2] = uint32((state[i*3*4+j] >> 48) |
    64  					((state[(i*3+1)*4+j] & 0x7f) << 16))
    65  				t[3] = uint32((state[(i*3+1)*4+j] >> 8) & 0x7fffff)
    66  				t[4] = uint32((state[(i*3+1)*4+j] >> 32) & 0x7fffff)
    67  				t[5] = uint32((state[(i*3+1)*4+j] >> 56) |
    68  					((state[(i*3+2)*4+j] & 0x7fff) << 8))
    69  				t[6] = uint32((state[(i*3+2)*4+j] >> 16) & 0x7fffff)
    70  				t[7] = uint32((state[(i*3+2)*4+j] >> 40) & 0x7fffff)
    71  
    72  				for k := 0; k < 8; k++ {
    73  					if t[k] < common.Q {
    74  						ps[j][idx[j]] = t[k]
    75  						idx[j]++
    76  						if idx[j] == common.N {
    77  							continue PolyLoop
    78  						}
    79  					}
    80  				}
    81  			}
    82  			done = false
    83  		}
    84  	}
    85  }
    86  
    87  // Sample p uniformly from the given seed and nonce.
    88  //
    89  // p will be normalized.
    90  func PolyDeriveUniform(p *common.Poly, seed *[32]byte, nonce uint16) {
    91  	var i, length int
    92  	var buf [12 * 16]byte // fits 168B SHAKE-128 rate
    93  
    94  	length = 168
    95  
    96  	sample := func() {
    97  		// Note that 3 divides into 168 and 12*16, so we use up buf completely.
    98  		for j := 0; j < length && i < common.N; j += 3 {
    99  			t := (uint32(buf[j]) | (uint32(buf[j+1]) << 8) |
   100  				(uint32(buf[j+2]) << 16)) & 0x7fffff
   101  
   102  			// We use rejection sampling
   103  			if t < common.Q {
   104  				p[i] = t
   105  				i++
   106  			}
   107  		}
   108  	}
   109  
   110  	var iv [32 + 2]byte // 32 byte seed + uint16 nonce
   111  	h := sha3.NewShake128()
   112  	copy(iv[:32], seed[:])
   113  	iv[32] = uint8(nonce)
   114  	iv[33] = uint8(nonce >> 8)
   115  	_, _ = h.Write(iv[:])
   116  
   117  	for i < common.N {
   118  		_, _ = h.Read(buf[:168])
   119  		sample()
   120  	}
   121  }
   122  
   123  // Sample p uniformly with coefficients of norm less than or equal η,
   124  // using the given seed and nonce.
   125  //
   126  // p will not be normalized, but will have coefficients in [q-η,q+η].
   127  func PolyDeriveUniformLeqEta(p *common.Poly, seed *[64]byte, nonce uint16) {
   128  	// Assumes 2 < η < 8.
   129  	var i, length int
   130  	var buf [9 * 16]byte // fits 136B SHAKE-256 rate
   131  
   132  	length = 136
   133  
   134  	sample := func() {
   135  		// We use rejection sampling
   136  		for j := 0; j < length && i < common.N; j++ {
   137  			t1 := uint32(buf[j]) & 15
   138  			t2 := uint32(buf[j]) >> 4
   139  			if Eta == 2 { // branch is eliminated by compiler
   140  				if t1 <= 14 {
   141  					t1 -= ((205 * t1) >> 10) * 5 // reduce mod  5
   142  					p[i] = common.Q + Eta - t1
   143  					i++
   144  				}
   145  				if t2 <= 14 && i < common.N {
   146  					t2 -= ((205 * t2) >> 10) * 5 // reduce mod 5
   147  					p[i] = common.Q + Eta - t2
   148  					i++
   149  				}
   150  			} else if Eta == 4 {
   151  				if t1 <= 2*Eta {
   152  					p[i] = common.Q + Eta - t1
   153  					i++
   154  				}
   155  				if t2 <= 2*Eta && i < common.N {
   156  					p[i] = common.Q + Eta - t2
   157  					i++
   158  				}
   159  			} else {
   160  				panic("unsupported η")
   161  			}
   162  		}
   163  	}
   164  
   165  	var iv [64 + 2]byte // 64 byte seed + uint16 nonce
   166  
   167  	h := sha3.NewShake256()
   168  	copy(iv[:64], seed[:])
   169  	iv[64] = uint8(nonce)
   170  	iv[65] = uint8(nonce >> 8)
   171  
   172  	// 136 is SHAKE-256 rate
   173  	_, _ = h.Write(iv[:])
   174  
   175  	for i < common.N {
   176  		_, _ = h.Read(buf[:136])
   177  		sample()
   178  	}
   179  }
   180  
   181  // Sample v[i] uniformly with coefficients in (-γ₁,…,γ₁]  using the
   182  // given seed and nonce+i
   183  //
   184  // p will be normalized.
   185  func VecLDeriveUniformLeGamma1(v *VecL, seed *[64]byte, nonce uint16) {
   186  	for i := 0; i < L; i++ {
   187  		PolyDeriveUniformLeGamma1(&v[i], seed, nonce+uint16(i))
   188  	}
   189  }
   190  
   191  // Sample p uniformly with coefficients in (-γ₁,…,γK1s] using the
   192  // given seed and nonce.
   193  //
   194  // p will be normalized.
   195  func PolyDeriveUniformLeGamma1(p *common.Poly, seed *[64]byte, nonce uint16) {
   196  	var buf [PolyLeGamma1Size]byte
   197  
   198  	var iv [66]byte
   199  	h := sha3.NewShake256()
   200  	copy(iv[:64], seed[:])
   201  	iv[64] = uint8(nonce)
   202  	iv[65] = uint8(nonce >> 8)
   203  	_, _ = h.Write(iv[:])
   204  	_, _ = h.Read(buf[:])
   205  
   206  	PolyUnpackLeGamma1(p, buf[:])
   207  }
   208  
   209  // For each i, sample ps[i] uniformly with τ non-zero coefficients in {q-1,1}
   210  // using the given seed and w1[i].  ps[i] may be nil and is ignored
   211  // in that case.  ps[i] will be normalized.
   212  //
   213  // Can only be called when DeriveX4Available is true.
   214  //
   215  // This function is currently not used (yet).
   216  func PolyDeriveUniformBallX4(ps [4]*common.Poly, seed []byte) {
   217  	var perm keccakf1600.StateX4
   218  	state := perm.Initialize(false)
   219  
   220  	// Absorb the seed in the four states
   221  	for i := 0; i < CTildeSize/8; i++ {
   222  		v := binary.LittleEndian.Uint64(seed[8*i : 8*(i+1)])
   223  		for j := 0; j < 4; j++ {
   224  			state[i*4+j] = v
   225  		}
   226  	}
   227  
   228  	// SHAKE256 domain separator and padding
   229  	for j := 0; j < 4; j++ {
   230  		state[(CTildeSize/8)*4+j] ^= 0x1f
   231  		state[16*4+j] ^= 0x80 << 56
   232  	}
   233  	perm.Permute()
   234  
   235  	var signs [4]uint64
   236  	var idx [4]uint16 // indices into ps
   237  
   238  	for j := 0; j < 4; j++ {
   239  		if ps[j] != nil {
   240  			signs[j] = state[j]
   241  			*ps[j] = common.Poly{} // zero ps[j]
   242  			idx[j] = common.N - Tau
   243  		} else {
   244  			idx[j] = common.N // mark as completed
   245  		}
   246  	}
   247  
   248  	stateOffset := 1
   249  	for {
   250  		done := true
   251  
   252  	PolyLoop:
   253  		for j := 0; j < 4; j++ {
   254  			if idx[j] == common.N {
   255  				continue
   256  			}
   257  
   258  			for i := stateOffset; i < 17; i++ {
   259  				var bs [8]byte
   260  				binary.LittleEndian.PutUint64(bs[:], state[4*i+j])
   261  				for k := 0; k < 8; k++ {
   262  					b := uint16(bs[k])
   263  
   264  					if b > idx[j] {
   265  						continue
   266  					}
   267  
   268  					ps[j][idx[j]] = ps[j][b]
   269  					ps[j][b] = 1
   270  					// Takes least significant bit of signs and uses it for the sign.
   271  					// Note 1 ^ (1 | (Q-1)) = Q-1.
   272  					ps[j][b] ^= uint32((-(signs[j] & 1)) & (1 | (common.Q - 1)))
   273  					signs[j] >>= 1
   274  
   275  					idx[j]++
   276  					if idx[j] == common.N {
   277  						continue PolyLoop
   278  					}
   279  				}
   280  			}
   281  
   282  			done = false
   283  		}
   284  
   285  		if done {
   286  			break
   287  		}
   288  
   289  		perm.Permute()
   290  		stateOffset = 0
   291  	}
   292  }
   293  
   294  // Samples p uniformly with τ non-zero coefficients in {q-1,1}.
   295  //
   296  // The polynomial p will be normalized.
   297  func PolyDeriveUniformBall(p *common.Poly, seed []byte) {
   298  	var buf [136]byte // SHAKE-256 rate is 136
   299  
   300  	h := sha3.NewShake256()
   301  	_, _ = h.Write(seed[:])
   302  	_, _ = h.Read(buf[:])
   303  
   304  	// Essentially we generate a sequence of τ ones or minus ones,
   305  	// prepend 196 zeroes and shuffle the concatenation using the
   306  	// usual algorithm (Fisher--Yates.)
   307  	signs := binary.LittleEndian.Uint64(buf[:])
   308  	bufOff := 8 // offset into buf
   309  
   310  	*p = common.Poly{} // zero p
   311  	for i := uint16(common.N - Tau); i < common.N; i++ {
   312  		var b uint16
   313  
   314  		// Find location of where to move the new coefficient to using
   315  		// rejection sampling.
   316  		for {
   317  			if bufOff >= 136 {
   318  				_, _ = h.Read(buf[:])
   319  				bufOff = 0
   320  			}
   321  
   322  			b = uint16(buf[bufOff])
   323  			bufOff++
   324  
   325  			if b <= i {
   326  				break
   327  			}
   328  		}
   329  
   330  		p[i] = p[b]
   331  		p[b] = 1
   332  		// Takes least significant bit of signs and uses it for the sign.
   333  		// Note 1 ^ (1 | (Q-1)) = Q-1.
   334  		p[b] ^= uint32((-(signs & 1)) & (1 | (common.Q - 1)))
   335  		signs >>= 1
   336  	}
   337  }