github.com/cloudflare/circl@v1.5.0/sign/dilithium/mode2/internal/sample.go (about) 1 // Code generated from mode3/internal/sample.go by gen.go 2 3 package internal 4 5 import ( 6 "encoding/binary" 7 8 "github.com/cloudflare/circl/internal/sha3" 9 common "github.com/cloudflare/circl/sign/internal/dilithium" 10 "github.com/cloudflare/circl/simd/keccakf1600" 11 ) 12 13 // DeriveX4Available indicates whether the system supports the quick fourway 14 // sampling variants like PolyDeriveUniformX4. 15 var DeriveX4Available = keccakf1600.IsEnabledX4() 16 17 // For each i, sample ps[i] uniformly from the given seed and nonces[i]. 18 // ps[i] may be nil and is ignored in that case. 19 // 20 // Can only be called when DeriveX4Available is true. 21 func PolyDeriveUniformX4(ps [4]*common.Poly, seed *[32]byte, nonces [4]uint16) { 22 var perm keccakf1600.StateX4 23 state := perm.Initialize(false) 24 25 // Absorb the seed in the four states 26 for i := 0; i < 4; i++ { 27 v := binary.LittleEndian.Uint64(seed[8*i : 8*(i+1)]) 28 for j := 0; j < 4; j++ { 29 state[i*4+j] = v 30 } 31 } 32 33 // Absorb the nonces, the SHAKE128 domain separator (0b1111), the 34 // start of the padding (0b...001) and the end of the padding 0b100... 35 // Recall that the rate of SHAKE128 is 168 --- i.e. 21 uint64s. 36 for j := 0; j < 4; j++ { 37 state[4*4+j] = uint64(nonces[j]) | (0x1f << 16) 38 state[20*4+j] = 0x80 << 56 39 } 40 41 var idx [4]int // indices into ps 42 for j := 0; j < 4; j++ { 43 if ps[j] == nil { 44 idx[j] = common.N // mark nil polynomial as completed 45 } 46 } 47 48 done := false 49 for !done { 50 // Applies KeccaK-f[1600] to state to get the next 21 uint64s of each 51 // of the four SHAKE128 streams. 52 perm.Permute() 53 54 done = true 55 56 PolyLoop: 57 for j := 0; j < 4; j++ { 58 if idx[j] == common.N { 59 continue 60 } 61 for i := 0; i < 7; i++ { 62 var t [8]uint32 63 t[0] = uint32(state[i*3*4+j] & 0x7fffff) 64 t[1] = uint32((state[i*3*4+j] >> 24) & 0x7fffff) 65 t[2] = uint32((state[i*3*4+j] >> 48) | 66 ((state[(i*3+1)*4+j] & 0x7f) << 16)) 67 t[3] = uint32((state[(i*3+1)*4+j] >> 8) & 0x7fffff) 68 t[4] = uint32((state[(i*3+1)*4+j] >> 32) & 0x7fffff) 69 t[5] = uint32((state[(i*3+1)*4+j] >> 56) | 70 ((state[(i*3+2)*4+j] & 0x7fff) << 8)) 71 t[6] = uint32((state[(i*3+2)*4+j] >> 16) & 0x7fffff) 72 t[7] = uint32((state[(i*3+2)*4+j] >> 40) & 0x7fffff) 73 74 for k := 0; k < 8; k++ { 75 if t[k] < common.Q { 76 ps[j][idx[j]] = t[k] 77 idx[j]++ 78 if idx[j] == common.N { 79 continue PolyLoop 80 } 81 } 82 } 83 } 84 done = false 85 } 86 } 87 } 88 89 // Sample p uniformly from the given seed and nonce. 90 // 91 // p will be normalized. 92 func PolyDeriveUniform(p *common.Poly, seed *[32]byte, nonce uint16) { 93 var i, length int 94 var buf [12 * 16]byte // fits 168B SHAKE-128 rate 95 96 length = 168 97 98 sample := func() { 99 // Note that 3 divides into 168 and 12*16, so we use up buf completely. 100 for j := 0; j < length && i < common.N; j += 3 { 101 t := (uint32(buf[j]) | (uint32(buf[j+1]) << 8) | 102 (uint32(buf[j+2]) << 16)) & 0x7fffff 103 104 // We use rejection sampling 105 if t < common.Q { 106 p[i] = t 107 i++ 108 } 109 } 110 } 111 112 var iv [32 + 2]byte // 32 byte seed + uint16 nonce 113 h := sha3.NewShake128() 114 copy(iv[:32], seed[:]) 115 iv[32] = uint8(nonce) 116 iv[33] = uint8(nonce >> 8) 117 _, _ = h.Write(iv[:]) 118 119 for i < common.N { 120 _, _ = h.Read(buf[:168]) 121 sample() 122 } 123 } 124 125 // Sample p uniformly with coefficients of norm less than or equal η, 126 // using the given seed and nonce. 127 // 128 // p will not be normalized, but will have coefficients in [q-η,q+η]. 129 func PolyDeriveUniformLeqEta(p *common.Poly, seed *[64]byte, nonce uint16) { 130 // Assumes 2 < η < 8. 131 var i, length int 132 var buf [9 * 16]byte // fits 136B SHAKE-256 rate 133 134 length = 136 135 136 sample := func() { 137 // We use rejection sampling 138 for j := 0; j < length && i < common.N; j++ { 139 t1 := uint32(buf[j]) & 15 140 t2 := uint32(buf[j]) >> 4 141 if Eta == 2 { // branch is eliminated by compiler 142 if t1 <= 14 { 143 t1 -= ((205 * t1) >> 10) * 5 // reduce mod 5 144 p[i] = common.Q + Eta - t1 145 i++ 146 } 147 if t2 <= 14 && i < common.N { 148 t2 -= ((205 * t2) >> 10) * 5 // reduce mod 5 149 p[i] = common.Q + Eta - t2 150 i++ 151 } 152 } else if Eta == 4 { 153 if t1 <= 2*Eta { 154 p[i] = common.Q + Eta - t1 155 i++ 156 } 157 if t2 <= 2*Eta && i < common.N { 158 p[i] = common.Q + Eta - t2 159 i++ 160 } 161 } else { 162 panic("unsupported η") 163 } 164 } 165 } 166 167 var iv [64 + 2]byte // 64 byte seed + uint16 nonce 168 169 h := sha3.NewShake256() 170 copy(iv[:64], seed[:]) 171 iv[64] = uint8(nonce) 172 iv[65] = uint8(nonce >> 8) 173 174 // 136 is SHAKE-256 rate 175 _, _ = h.Write(iv[:]) 176 177 for i < common.N { 178 _, _ = h.Read(buf[:136]) 179 sample() 180 } 181 } 182 183 // Sample v[i] uniformly with coefficients in (-γ₁,…,γ₁] using the 184 // given seed and nonce+i 185 // 186 // p will be normalized. 187 func VecLDeriveUniformLeGamma1(v *VecL, seed *[64]byte, nonce uint16) { 188 for i := 0; i < L; i++ { 189 PolyDeriveUniformLeGamma1(&v[i], seed, nonce+uint16(i)) 190 } 191 } 192 193 // Sample p uniformly with coefficients in (-γ₁,…,γK1s] using the 194 // given seed and nonce. 195 // 196 // p will be normalized. 197 func PolyDeriveUniformLeGamma1(p *common.Poly, seed *[64]byte, nonce uint16) { 198 var buf [PolyLeGamma1Size]byte 199 200 var iv [66]byte 201 h := sha3.NewShake256() 202 copy(iv[:64], seed[:]) 203 iv[64] = uint8(nonce) 204 iv[65] = uint8(nonce >> 8) 205 _, _ = h.Write(iv[:]) 206 _, _ = h.Read(buf[:]) 207 208 PolyUnpackLeGamma1(p, buf[:]) 209 } 210 211 // For each i, sample ps[i] uniformly with τ non-zero coefficients in {q-1,1} 212 // using the given seed and w1[i]. ps[i] may be nil and is ignored 213 // in that case. ps[i] will be normalized. 214 // 215 // Can only be called when DeriveX4Available is true. 216 // 217 // This function is currently not used (yet). 218 func PolyDeriveUniformBallX4(ps [4]*common.Poly, seed []byte) { 219 var perm keccakf1600.StateX4 220 state := perm.Initialize(false) 221 222 // Absorb the seed in the four states 223 for i := 0; i < CTildeSize/8; i++ { 224 v := binary.LittleEndian.Uint64(seed[8*i : 8*(i+1)]) 225 for j := 0; j < 4; j++ { 226 state[i*4+j] = v 227 } 228 } 229 230 // SHAKE256 domain separator and padding 231 for j := 0; j < 4; j++ { 232 state[(CTildeSize/8)*4+j] ^= 0x1f 233 state[16*4+j] ^= 0x80 << 56 234 } 235 perm.Permute() 236 237 var signs [4]uint64 238 var idx [4]uint16 // indices into ps 239 240 for j := 0; j < 4; j++ { 241 if ps[j] != nil { 242 signs[j] = state[j] 243 *ps[j] = common.Poly{} // zero ps[j] 244 idx[j] = common.N - Tau 245 } else { 246 idx[j] = common.N // mark as completed 247 } 248 } 249 250 stateOffset := 1 251 for { 252 done := true 253 254 PolyLoop: 255 for j := 0; j < 4; j++ { 256 if idx[j] == common.N { 257 continue 258 } 259 260 for i := stateOffset; i < 17; i++ { 261 var bs [8]byte 262 binary.LittleEndian.PutUint64(bs[:], state[4*i+j]) 263 for k := 0; k < 8; k++ { 264 b := uint16(bs[k]) 265 266 if b > idx[j] { 267 continue 268 } 269 270 ps[j][idx[j]] = ps[j][b] 271 ps[j][b] = 1 272 // Takes least significant bit of signs and uses it for the sign. 273 // Note 1 ^ (1 | (Q-1)) = Q-1. 274 ps[j][b] ^= uint32((-(signs[j] & 1)) & (1 | (common.Q - 1))) 275 signs[j] >>= 1 276 277 idx[j]++ 278 if idx[j] == common.N { 279 continue PolyLoop 280 } 281 } 282 } 283 284 done = false 285 } 286 287 if done { 288 break 289 } 290 291 perm.Permute() 292 stateOffset = 0 293 } 294 } 295 296 // Samples p uniformly with τ non-zero coefficients in {q-1,1}. 297 // 298 // The polynomial p will be normalized. 299 func PolyDeriveUniformBall(p *common.Poly, seed []byte) { 300 var buf [136]byte // SHAKE-256 rate is 136 301 302 h := sha3.NewShake256() 303 _, _ = h.Write(seed[:]) 304 _, _ = h.Read(buf[:]) 305 306 // Essentially we generate a sequence of τ ones or minus ones, 307 // prepend 196 zeroes and shuffle the concatenation using the 308 // usual algorithm (Fisher--Yates.) 309 signs := binary.LittleEndian.Uint64(buf[:]) 310 bufOff := 8 // offset into buf 311 312 *p = common.Poly{} // zero p 313 for i := uint16(common.N - Tau); i < common.N; i++ { 314 var b uint16 315 316 // Find location of where to move the new coefficient to using 317 // rejection sampling. 318 for { 319 if bufOff >= 136 { 320 _, _ = h.Read(buf[:]) 321 bufOff = 0 322 } 323 324 b = uint16(buf[bufOff]) 325 bufOff++ 326 327 if b <= i { 328 break 329 } 330 } 331 332 p[i] = p[b] 333 p[b] = 1 334 // Takes least significant bit of signs and uses it for the sign. 335 // Note 1 ^ (1 | (Q-1)) = Q-1. 336 p[b] ^= uint32((-(signs & 1)) & (1 | (common.Q - 1))) 337 signs >>= 1 338 } 339 }