github.com/cloudflare/circl@v1.5.0/sign/dilithium/mode3/internal/sample.go (about) 1 package internal 2 3 import ( 4 "encoding/binary" 5 6 "github.com/cloudflare/circl/internal/sha3" 7 common "github.com/cloudflare/circl/sign/internal/dilithium" 8 "github.com/cloudflare/circl/simd/keccakf1600" 9 ) 10 11 // DeriveX4Available indicates whether the system supports the quick fourway 12 // sampling variants like PolyDeriveUniformX4. 13 var DeriveX4Available = keccakf1600.IsEnabledX4() 14 15 // For each i, sample ps[i] uniformly from the given seed and nonces[i]. 16 // ps[i] may be nil and is ignored in that case. 17 // 18 // Can only be called when DeriveX4Available is true. 19 func PolyDeriveUniformX4(ps [4]*common.Poly, seed *[32]byte, nonces [4]uint16) { 20 var perm keccakf1600.StateX4 21 state := perm.Initialize(false) 22 23 // Absorb the seed in the four states 24 for i := 0; i < 4; i++ { 25 v := binary.LittleEndian.Uint64(seed[8*i : 8*(i+1)]) 26 for j := 0; j < 4; j++ { 27 state[i*4+j] = v 28 } 29 } 30 31 // Absorb the nonces, the SHAKE128 domain separator (0b1111), the 32 // start of the padding (0b...001) and the end of the padding 0b100... 33 // Recall that the rate of SHAKE128 is 168 --- i.e. 21 uint64s. 34 for j := 0; j < 4; j++ { 35 state[4*4+j] = uint64(nonces[j]) | (0x1f << 16) 36 state[20*4+j] = 0x80 << 56 37 } 38 39 var idx [4]int // indices into ps 40 for j := 0; j < 4; j++ { 41 if ps[j] == nil { 42 idx[j] = common.N // mark nil polynomial as completed 43 } 44 } 45 46 done := false 47 for !done { 48 // Applies KeccaK-f[1600] to state to get the next 21 uint64s of each 49 // of the four SHAKE128 streams. 50 perm.Permute() 51 52 done = true 53 54 PolyLoop: 55 for j := 0; j < 4; j++ { 56 if idx[j] == common.N { 57 continue 58 } 59 for i := 0; i < 7; i++ { 60 var t [8]uint32 61 t[0] = uint32(state[i*3*4+j] & 0x7fffff) 62 t[1] = uint32((state[i*3*4+j] >> 24) & 0x7fffff) 63 t[2] = uint32((state[i*3*4+j] >> 48) | 64 ((state[(i*3+1)*4+j] & 0x7f) << 16)) 65 t[3] = uint32((state[(i*3+1)*4+j] >> 8) & 0x7fffff) 66 t[4] = uint32((state[(i*3+1)*4+j] >> 32) & 0x7fffff) 67 t[5] = uint32((state[(i*3+1)*4+j] >> 56) | 68 ((state[(i*3+2)*4+j] & 0x7fff) << 8)) 69 t[6] = uint32((state[(i*3+2)*4+j] >> 16) & 0x7fffff) 70 t[7] = uint32((state[(i*3+2)*4+j] >> 40) & 0x7fffff) 71 72 for k := 0; k < 8; k++ { 73 if t[k] < common.Q { 74 ps[j][idx[j]] = t[k] 75 idx[j]++ 76 if idx[j] == common.N { 77 continue PolyLoop 78 } 79 } 80 } 81 } 82 done = false 83 } 84 } 85 } 86 87 // Sample p uniformly from the given seed and nonce. 88 // 89 // p will be normalized. 90 func PolyDeriveUniform(p *common.Poly, seed *[32]byte, nonce uint16) { 91 var i, length int 92 var buf [12 * 16]byte // fits 168B SHAKE-128 rate 93 94 length = 168 95 96 sample := func() { 97 // Note that 3 divides into 168 and 12*16, so we use up buf completely. 98 for j := 0; j < length && i < common.N; j += 3 { 99 t := (uint32(buf[j]) | (uint32(buf[j+1]) << 8) | 100 (uint32(buf[j+2]) << 16)) & 0x7fffff 101 102 // We use rejection sampling 103 if t < common.Q { 104 p[i] = t 105 i++ 106 } 107 } 108 } 109 110 var iv [32 + 2]byte // 32 byte seed + uint16 nonce 111 h := sha3.NewShake128() 112 copy(iv[:32], seed[:]) 113 iv[32] = uint8(nonce) 114 iv[33] = uint8(nonce >> 8) 115 _, _ = h.Write(iv[:]) 116 117 for i < common.N { 118 _, _ = h.Read(buf[:168]) 119 sample() 120 } 121 } 122 123 // Sample p uniformly with coefficients of norm less than or equal η, 124 // using the given seed and nonce. 125 // 126 // p will not be normalized, but will have coefficients in [q-η,q+η]. 127 func PolyDeriveUniformLeqEta(p *common.Poly, seed *[64]byte, nonce uint16) { 128 // Assumes 2 < η < 8. 129 var i, length int 130 var buf [9 * 16]byte // fits 136B SHAKE-256 rate 131 132 length = 136 133 134 sample := func() { 135 // We use rejection sampling 136 for j := 0; j < length && i < common.N; j++ { 137 t1 := uint32(buf[j]) & 15 138 t2 := uint32(buf[j]) >> 4 139 if Eta == 2 { // branch is eliminated by compiler 140 if t1 <= 14 { 141 t1 -= ((205 * t1) >> 10) * 5 // reduce mod 5 142 p[i] = common.Q + Eta - t1 143 i++ 144 } 145 if t2 <= 14 && i < common.N { 146 t2 -= ((205 * t2) >> 10) * 5 // reduce mod 5 147 p[i] = common.Q + Eta - t2 148 i++ 149 } 150 } else if Eta == 4 { 151 if t1 <= 2*Eta { 152 p[i] = common.Q + Eta - t1 153 i++ 154 } 155 if t2 <= 2*Eta && i < common.N { 156 p[i] = common.Q + Eta - t2 157 i++ 158 } 159 } else { 160 panic("unsupported η") 161 } 162 } 163 } 164 165 var iv [64 + 2]byte // 64 byte seed + uint16 nonce 166 167 h := sha3.NewShake256() 168 copy(iv[:64], seed[:]) 169 iv[64] = uint8(nonce) 170 iv[65] = uint8(nonce >> 8) 171 172 // 136 is SHAKE-256 rate 173 _, _ = h.Write(iv[:]) 174 175 for i < common.N { 176 _, _ = h.Read(buf[:136]) 177 sample() 178 } 179 } 180 181 // Sample v[i] uniformly with coefficients in (-γ₁,…,γ₁] using the 182 // given seed and nonce+i 183 // 184 // p will be normalized. 185 func VecLDeriveUniformLeGamma1(v *VecL, seed *[64]byte, nonce uint16) { 186 for i := 0; i < L; i++ { 187 PolyDeriveUniformLeGamma1(&v[i], seed, nonce+uint16(i)) 188 } 189 } 190 191 // Sample p uniformly with coefficients in (-γ₁,…,γK1s] using the 192 // given seed and nonce. 193 // 194 // p will be normalized. 195 func PolyDeriveUniformLeGamma1(p *common.Poly, seed *[64]byte, nonce uint16) { 196 var buf [PolyLeGamma1Size]byte 197 198 var iv [66]byte 199 h := sha3.NewShake256() 200 copy(iv[:64], seed[:]) 201 iv[64] = uint8(nonce) 202 iv[65] = uint8(nonce >> 8) 203 _, _ = h.Write(iv[:]) 204 _, _ = h.Read(buf[:]) 205 206 PolyUnpackLeGamma1(p, buf[:]) 207 } 208 209 // For each i, sample ps[i] uniformly with τ non-zero coefficients in {q-1,1} 210 // using the given seed and w1[i]. ps[i] may be nil and is ignored 211 // in that case. ps[i] will be normalized. 212 // 213 // Can only be called when DeriveX4Available is true. 214 // 215 // This function is currently not used (yet). 216 func PolyDeriveUniformBallX4(ps [4]*common.Poly, seed []byte) { 217 var perm keccakf1600.StateX4 218 state := perm.Initialize(false) 219 220 // Absorb the seed in the four states 221 for i := 0; i < CTildeSize/8; i++ { 222 v := binary.LittleEndian.Uint64(seed[8*i : 8*(i+1)]) 223 for j := 0; j < 4; j++ { 224 state[i*4+j] = v 225 } 226 } 227 228 // SHAKE256 domain separator and padding 229 for j := 0; j < 4; j++ { 230 state[(CTildeSize/8)*4+j] ^= 0x1f 231 state[16*4+j] ^= 0x80 << 56 232 } 233 perm.Permute() 234 235 var signs [4]uint64 236 var idx [4]uint16 // indices into ps 237 238 for j := 0; j < 4; j++ { 239 if ps[j] != nil { 240 signs[j] = state[j] 241 *ps[j] = common.Poly{} // zero ps[j] 242 idx[j] = common.N - Tau 243 } else { 244 idx[j] = common.N // mark as completed 245 } 246 } 247 248 stateOffset := 1 249 for { 250 done := true 251 252 PolyLoop: 253 for j := 0; j < 4; j++ { 254 if idx[j] == common.N { 255 continue 256 } 257 258 for i := stateOffset; i < 17; i++ { 259 var bs [8]byte 260 binary.LittleEndian.PutUint64(bs[:], state[4*i+j]) 261 for k := 0; k < 8; k++ { 262 b := uint16(bs[k]) 263 264 if b > idx[j] { 265 continue 266 } 267 268 ps[j][idx[j]] = ps[j][b] 269 ps[j][b] = 1 270 // Takes least significant bit of signs and uses it for the sign. 271 // Note 1 ^ (1 | (Q-1)) = Q-1. 272 ps[j][b] ^= uint32((-(signs[j] & 1)) & (1 | (common.Q - 1))) 273 signs[j] >>= 1 274 275 idx[j]++ 276 if idx[j] == common.N { 277 continue PolyLoop 278 } 279 } 280 } 281 282 done = false 283 } 284 285 if done { 286 break 287 } 288 289 perm.Permute() 290 stateOffset = 0 291 } 292 } 293 294 // Samples p uniformly with τ non-zero coefficients in {q-1,1}. 295 // 296 // The polynomial p will be normalized. 297 func PolyDeriveUniformBall(p *common.Poly, seed []byte) { 298 var buf [136]byte // SHAKE-256 rate is 136 299 300 h := sha3.NewShake256() 301 _, _ = h.Write(seed[:]) 302 _, _ = h.Read(buf[:]) 303 304 // Essentially we generate a sequence of τ ones or minus ones, 305 // prepend 196 zeroes and shuffle the concatenation using the 306 // usual algorithm (Fisher--Yates.) 307 signs := binary.LittleEndian.Uint64(buf[:]) 308 bufOff := 8 // offset into buf 309 310 *p = common.Poly{} // zero p 311 for i := uint16(common.N - Tau); i < common.N; i++ { 312 var b uint16 313 314 // Find location of where to move the new coefficient to using 315 // rejection sampling. 316 for { 317 if bufOff >= 136 { 318 _, _ = h.Read(buf[:]) 319 bufOff = 0 320 } 321 322 b = uint16(buf[bufOff]) 323 bufOff++ 324 325 if b <= i { 326 break 327 } 328 } 329 330 p[i] = p[b] 331 p[b] = 1 332 // Takes least significant bit of signs and uses it for the sign. 333 // Note 1 ^ (1 | (Q-1)) = Q-1. 334 p[b] ^= uint32((-(signs & 1)) & (1 | (common.Q - 1))) 335 signs >>= 1 336 } 337 }