github.com/consensys/gnark-crypto@v0.14.0/ecc/bls24-317/fp/element_ops_purego.go (about) 1 //go:build !amd64 || purego 2 // +build !amd64 purego 3 4 // Copyright 2020 ConsenSys Software Inc. 5 // 6 // Licensed under the Apache License, Version 2.0 (the "License"); 7 // you may not use this file except in compliance with the License. 8 // You may obtain a copy of the License at 9 // 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // 12 // Unless required by applicable law or agreed to in writing, software 13 // distributed under the License is distributed on an "AS IS" BASIS, 14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 // See the License for the specific language governing permissions and 16 // limitations under the License. 17 18 // Code generated by consensys/gnark-crypto DO NOT EDIT 19 20 package fp 21 22 import "math/bits" 23 24 // MulBy3 x *= 3 (mod q) 25 func MulBy3(x *Element) { 26 _x := *x 27 x.Double(x).Add(x, &_x) 28 } 29 30 // MulBy5 x *= 5 (mod q) 31 func MulBy5(x *Element) { 32 _x := *x 33 x.Double(x).Double(x).Add(x, &_x) 34 } 35 36 // MulBy13 x *= 13 (mod q) 37 func MulBy13(x *Element) { 38 var y = Element{ 39 17338930599381248615, 40 10169435867607475877, 41 1410856163759197139, 42 12105193723137614523, 43 691221942076914011, 44 } 45 x.Mul(x, &y) 46 } 47 48 // Butterfly sets 49 // 50 // a = a + b (mod q) 51 // b = a - b (mod q) 52 func Butterfly(a, b *Element) { 53 _butterflyGeneric(a, b) 54 } 55 56 func fromMont(z *Element) { 57 _fromMontGeneric(z) 58 } 59 60 func reduce(z *Element) { 61 _reduceGeneric(z) 62 } 63 64 // Mul z = x * y (mod q) 65 // 66 // x and y must be less than q 67 func (z *Element) Mul(x, y *Element) *Element { 68 69 // Implements CIOS multiplication -- section 2.3.2 of Tolga Acar's thesis 70 // https://www.microsoft.com/en-us/research/wp-content/uploads/1998/06/97Acar.pdf 71 // 72 // The algorithm: 73 // 74 // for i=0 to N-1 75 // C := 0 76 // for j=0 to N-1 77 // (C,t[j]) := t[j] + x[j]*y[i] + C 78 // (t[N+1],t[N]) := t[N] + C 79 // 80 // C := 0 81 // m := t[0]*q'[0] mod D 82 // (C,_) := t[0] + m*q[0] 83 // for j=1 to N-1 84 // (C,t[j-1]) := t[j] + m*q[j] + C 85 // 86 // (C,t[N-1]) := t[N] + C 87 // t[N] := t[N+1] + C 88 // 89 // → N is the number of machine words needed to store the modulus q 90 // → D is the word size. For example, on a 64-bit architecture D is 2 64 91 // → x[i], y[i], q[i] is the ith word of the numbers x,y,q 92 // → q'[0] is the lowest word of the number -q⁻¹ mod r. This quantity is pre-computed, as it does not depend on the inputs. 93 // → t is a temporary array of size N+2 94 // → C, S are machine words. A pair (C,S) refers to (hi-bits, lo-bits) of a two-word number 95 // 96 // As described here https://hackmd.io/@gnark/modular_multiplication we can get rid of one carry chain and simplify: 97 // (also described in https://eprint.iacr.org/2022/1400.pdf annex) 98 // 99 // for i=0 to N-1 100 // (A,t[0]) := t[0] + x[0]*y[i] 101 // m := t[0]*q'[0] mod W 102 // C,_ := t[0] + m*q[0] 103 // for j=1 to N-1 104 // (A,t[j]) := t[j] + x[j]*y[i] + A 105 // (C,t[j-1]) := t[j] + m*q[j] + C 106 // 107 // t[N-1] = C + A 108 // 109 // This optimization saves 5N + 2 additions in the algorithm, and can be used whenever the highest bit 110 // of the modulus is zero (and not all of the remaining bits are set). 111 112 var t0, t1, t2, t3, t4 uint64 113 var u0, u1, u2, u3, u4 uint64 114 { 115 var c0, c1, c2 uint64 116 v := x[0] 117 u0, t0 = bits.Mul64(v, y[0]) 118 u1, t1 = bits.Mul64(v, y[1]) 119 u2, t2 = bits.Mul64(v, y[2]) 120 u3, t3 = bits.Mul64(v, y[3]) 121 u4, t4 = bits.Mul64(v, y[4]) 122 t1, c0 = bits.Add64(u0, t1, 0) 123 t2, c0 = bits.Add64(u1, t2, c0) 124 t3, c0 = bits.Add64(u2, t3, c0) 125 t4, c0 = bits.Add64(u3, t4, c0) 126 c2, _ = bits.Add64(u4, 0, c0) 127 128 m := qInvNeg * t0 129 130 u0, c1 = bits.Mul64(m, q0) 131 _, c0 = bits.Add64(t0, c1, 0) 132 u1, c1 = bits.Mul64(m, q1) 133 t0, c0 = bits.Add64(t1, c1, c0) 134 u2, c1 = bits.Mul64(m, q2) 135 t1, c0 = bits.Add64(t2, c1, c0) 136 u3, c1 = bits.Mul64(m, q3) 137 t2, c0 = bits.Add64(t3, c1, c0) 138 u4, c1 = bits.Mul64(m, q4) 139 140 t3, c0 = bits.Add64(0, c1, c0) 141 u4, _ = bits.Add64(u4, 0, c0) 142 t0, c0 = bits.Add64(u0, t0, 0) 143 t1, c0 = bits.Add64(u1, t1, c0) 144 t2, c0 = bits.Add64(u2, t2, c0) 145 t3, c0 = bits.Add64(u3, t3, c0) 146 c2, _ = bits.Add64(c2, 0, c0) 147 t3, c0 = bits.Add64(t4, t3, 0) 148 t4, _ = bits.Add64(u4, c2, c0) 149 150 } 151 { 152 var c0, c1, c2 uint64 153 v := x[1] 154 u0, c1 = bits.Mul64(v, y[0]) 155 t0, c0 = bits.Add64(c1, t0, 0) 156 u1, c1 = bits.Mul64(v, y[1]) 157 t1, c0 = bits.Add64(c1, t1, c0) 158 u2, c1 = bits.Mul64(v, y[2]) 159 t2, c0 = bits.Add64(c1, t2, c0) 160 u3, c1 = bits.Mul64(v, y[3]) 161 t3, c0 = bits.Add64(c1, t3, c0) 162 u4, c1 = bits.Mul64(v, y[4]) 163 t4, c0 = bits.Add64(c1, t4, c0) 164 165 c2, _ = bits.Add64(0, 0, c0) 166 t1, c0 = bits.Add64(u0, t1, 0) 167 t2, c0 = bits.Add64(u1, t2, c0) 168 t3, c0 = bits.Add64(u2, t3, c0) 169 t4, c0 = bits.Add64(u3, t4, c0) 170 c2, _ = bits.Add64(u4, c2, c0) 171 172 m := qInvNeg * t0 173 174 u0, c1 = bits.Mul64(m, q0) 175 _, c0 = bits.Add64(t0, c1, 0) 176 u1, c1 = bits.Mul64(m, q1) 177 t0, c0 = bits.Add64(t1, c1, c0) 178 u2, c1 = bits.Mul64(m, q2) 179 t1, c0 = bits.Add64(t2, c1, c0) 180 u3, c1 = bits.Mul64(m, q3) 181 t2, c0 = bits.Add64(t3, c1, c0) 182 u4, c1 = bits.Mul64(m, q4) 183 184 t3, c0 = bits.Add64(0, c1, c0) 185 u4, _ = bits.Add64(u4, 0, c0) 186 t0, c0 = bits.Add64(u0, t0, 0) 187 t1, c0 = bits.Add64(u1, t1, c0) 188 t2, c0 = bits.Add64(u2, t2, c0) 189 t3, c0 = bits.Add64(u3, t3, c0) 190 c2, _ = bits.Add64(c2, 0, c0) 191 t3, c0 = bits.Add64(t4, t3, 0) 192 t4, _ = bits.Add64(u4, c2, c0) 193 194 } 195 { 196 var c0, c1, c2 uint64 197 v := x[2] 198 u0, c1 = bits.Mul64(v, y[0]) 199 t0, c0 = bits.Add64(c1, t0, 0) 200 u1, c1 = bits.Mul64(v, y[1]) 201 t1, c0 = bits.Add64(c1, t1, c0) 202 u2, c1 = bits.Mul64(v, y[2]) 203 t2, c0 = bits.Add64(c1, t2, c0) 204 u3, c1 = bits.Mul64(v, y[3]) 205 t3, c0 = bits.Add64(c1, t3, c0) 206 u4, c1 = bits.Mul64(v, y[4]) 207 t4, c0 = bits.Add64(c1, t4, c0) 208 209 c2, _ = bits.Add64(0, 0, c0) 210 t1, c0 = bits.Add64(u0, t1, 0) 211 t2, c0 = bits.Add64(u1, t2, c0) 212 t3, c0 = bits.Add64(u2, t3, c0) 213 t4, c0 = bits.Add64(u3, t4, c0) 214 c2, _ = bits.Add64(u4, c2, c0) 215 216 m := qInvNeg * t0 217 218 u0, c1 = bits.Mul64(m, q0) 219 _, c0 = bits.Add64(t0, c1, 0) 220 u1, c1 = bits.Mul64(m, q1) 221 t0, c0 = bits.Add64(t1, c1, c0) 222 u2, c1 = bits.Mul64(m, q2) 223 t1, c0 = bits.Add64(t2, c1, c0) 224 u3, c1 = bits.Mul64(m, q3) 225 t2, c0 = bits.Add64(t3, c1, c0) 226 u4, c1 = bits.Mul64(m, q4) 227 228 t3, c0 = bits.Add64(0, c1, c0) 229 u4, _ = bits.Add64(u4, 0, c0) 230 t0, c0 = bits.Add64(u0, t0, 0) 231 t1, c0 = bits.Add64(u1, t1, c0) 232 t2, c0 = bits.Add64(u2, t2, c0) 233 t3, c0 = bits.Add64(u3, t3, c0) 234 c2, _ = bits.Add64(c2, 0, c0) 235 t3, c0 = bits.Add64(t4, t3, 0) 236 t4, _ = bits.Add64(u4, c2, c0) 237 238 } 239 { 240 var c0, c1, c2 uint64 241 v := x[3] 242 u0, c1 = bits.Mul64(v, y[0]) 243 t0, c0 = bits.Add64(c1, t0, 0) 244 u1, c1 = bits.Mul64(v, y[1]) 245 t1, c0 = bits.Add64(c1, t1, c0) 246 u2, c1 = bits.Mul64(v, y[2]) 247 t2, c0 = bits.Add64(c1, t2, c0) 248 u3, c1 = bits.Mul64(v, y[3]) 249 t3, c0 = bits.Add64(c1, t3, c0) 250 u4, c1 = bits.Mul64(v, y[4]) 251 t4, c0 = bits.Add64(c1, t4, c0) 252 253 c2, _ = bits.Add64(0, 0, c0) 254 t1, c0 = bits.Add64(u0, t1, 0) 255 t2, c0 = bits.Add64(u1, t2, c0) 256 t3, c0 = bits.Add64(u2, t3, c0) 257 t4, c0 = bits.Add64(u3, t4, c0) 258 c2, _ = bits.Add64(u4, c2, c0) 259 260 m := qInvNeg * t0 261 262 u0, c1 = bits.Mul64(m, q0) 263 _, c0 = bits.Add64(t0, c1, 0) 264 u1, c1 = bits.Mul64(m, q1) 265 t0, c0 = bits.Add64(t1, c1, c0) 266 u2, c1 = bits.Mul64(m, q2) 267 t1, c0 = bits.Add64(t2, c1, c0) 268 u3, c1 = bits.Mul64(m, q3) 269 t2, c0 = bits.Add64(t3, c1, c0) 270 u4, c1 = bits.Mul64(m, q4) 271 272 t3, c0 = bits.Add64(0, c1, c0) 273 u4, _ = bits.Add64(u4, 0, c0) 274 t0, c0 = bits.Add64(u0, t0, 0) 275 t1, c0 = bits.Add64(u1, t1, c0) 276 t2, c0 = bits.Add64(u2, t2, c0) 277 t3, c0 = bits.Add64(u3, t3, c0) 278 c2, _ = bits.Add64(c2, 0, c0) 279 t3, c0 = bits.Add64(t4, t3, 0) 280 t4, _ = bits.Add64(u4, c2, c0) 281 282 } 283 { 284 var c0, c1, c2 uint64 285 v := x[4] 286 u0, c1 = bits.Mul64(v, y[0]) 287 t0, c0 = bits.Add64(c1, t0, 0) 288 u1, c1 = bits.Mul64(v, y[1]) 289 t1, c0 = bits.Add64(c1, t1, c0) 290 u2, c1 = bits.Mul64(v, y[2]) 291 t2, c0 = bits.Add64(c1, t2, c0) 292 u3, c1 = bits.Mul64(v, y[3]) 293 t3, c0 = bits.Add64(c1, t3, c0) 294 u4, c1 = bits.Mul64(v, y[4]) 295 t4, c0 = bits.Add64(c1, t4, c0) 296 297 c2, _ = bits.Add64(0, 0, c0) 298 t1, c0 = bits.Add64(u0, t1, 0) 299 t2, c0 = bits.Add64(u1, t2, c0) 300 t3, c0 = bits.Add64(u2, t3, c0) 301 t4, c0 = bits.Add64(u3, t4, c0) 302 c2, _ = bits.Add64(u4, c2, c0) 303 304 m := qInvNeg * t0 305 306 u0, c1 = bits.Mul64(m, q0) 307 _, c0 = bits.Add64(t0, c1, 0) 308 u1, c1 = bits.Mul64(m, q1) 309 t0, c0 = bits.Add64(t1, c1, c0) 310 u2, c1 = bits.Mul64(m, q2) 311 t1, c0 = bits.Add64(t2, c1, c0) 312 u3, c1 = bits.Mul64(m, q3) 313 t2, c0 = bits.Add64(t3, c1, c0) 314 u4, c1 = bits.Mul64(m, q4) 315 316 t3, c0 = bits.Add64(0, c1, c0) 317 u4, _ = bits.Add64(u4, 0, c0) 318 t0, c0 = bits.Add64(u0, t0, 0) 319 t1, c0 = bits.Add64(u1, t1, c0) 320 t2, c0 = bits.Add64(u2, t2, c0) 321 t3, c0 = bits.Add64(u3, t3, c0) 322 c2, _ = bits.Add64(c2, 0, c0) 323 t3, c0 = bits.Add64(t4, t3, 0) 324 t4, _ = bits.Add64(u4, c2, c0) 325 326 } 327 z[0] = t0 328 z[1] = t1 329 z[2] = t2 330 z[3] = t3 331 z[4] = t4 332 333 // if z ⩾ q → z -= q 334 if !z.smallerThanModulus() { 335 var b uint64 336 z[0], b = bits.Sub64(z[0], q0, 0) 337 z[1], b = bits.Sub64(z[1], q1, b) 338 z[2], b = bits.Sub64(z[2], q2, b) 339 z[3], b = bits.Sub64(z[3], q3, b) 340 z[4], _ = bits.Sub64(z[4], q4, b) 341 } 342 return z 343 } 344 345 // Square z = x * x (mod q) 346 // 347 // x must be less than q 348 func (z *Element) Square(x *Element) *Element { 349 // see Mul for algorithm documentation 350 351 var t0, t1, t2, t3, t4 uint64 352 var u0, u1, u2, u3, u4 uint64 353 { 354 var c0, c1, c2 uint64 355 v := x[0] 356 u0, t0 = bits.Mul64(v, x[0]) 357 u1, t1 = bits.Mul64(v, x[1]) 358 u2, t2 = bits.Mul64(v, x[2]) 359 u3, t3 = bits.Mul64(v, x[3]) 360 u4, t4 = bits.Mul64(v, x[4]) 361 t1, c0 = bits.Add64(u0, t1, 0) 362 t2, c0 = bits.Add64(u1, t2, c0) 363 t3, c0 = bits.Add64(u2, t3, c0) 364 t4, c0 = bits.Add64(u3, t4, c0) 365 c2, _ = bits.Add64(u4, 0, c0) 366 367 m := qInvNeg * t0 368 369 u0, c1 = bits.Mul64(m, q0) 370 _, c0 = bits.Add64(t0, c1, 0) 371 u1, c1 = bits.Mul64(m, q1) 372 t0, c0 = bits.Add64(t1, c1, c0) 373 u2, c1 = bits.Mul64(m, q2) 374 t1, c0 = bits.Add64(t2, c1, c0) 375 u3, c1 = bits.Mul64(m, q3) 376 t2, c0 = bits.Add64(t3, c1, c0) 377 u4, c1 = bits.Mul64(m, q4) 378 379 t3, c0 = bits.Add64(0, c1, c0) 380 u4, _ = bits.Add64(u4, 0, c0) 381 t0, c0 = bits.Add64(u0, t0, 0) 382 t1, c0 = bits.Add64(u1, t1, c0) 383 t2, c0 = bits.Add64(u2, t2, c0) 384 t3, c0 = bits.Add64(u3, t3, c0) 385 c2, _ = bits.Add64(c2, 0, c0) 386 t3, c0 = bits.Add64(t4, t3, 0) 387 t4, _ = bits.Add64(u4, c2, c0) 388 389 } 390 { 391 var c0, c1, c2 uint64 392 v := x[1] 393 u0, c1 = bits.Mul64(v, x[0]) 394 t0, c0 = bits.Add64(c1, t0, 0) 395 u1, c1 = bits.Mul64(v, x[1]) 396 t1, c0 = bits.Add64(c1, t1, c0) 397 u2, c1 = bits.Mul64(v, x[2]) 398 t2, c0 = bits.Add64(c1, t2, c0) 399 u3, c1 = bits.Mul64(v, x[3]) 400 t3, c0 = bits.Add64(c1, t3, c0) 401 u4, c1 = bits.Mul64(v, x[4]) 402 t4, c0 = bits.Add64(c1, t4, c0) 403 404 c2, _ = bits.Add64(0, 0, c0) 405 t1, c0 = bits.Add64(u0, t1, 0) 406 t2, c0 = bits.Add64(u1, t2, c0) 407 t3, c0 = bits.Add64(u2, t3, c0) 408 t4, c0 = bits.Add64(u3, t4, c0) 409 c2, _ = bits.Add64(u4, c2, c0) 410 411 m := qInvNeg * t0 412 413 u0, c1 = bits.Mul64(m, q0) 414 _, c0 = bits.Add64(t0, c1, 0) 415 u1, c1 = bits.Mul64(m, q1) 416 t0, c0 = bits.Add64(t1, c1, c0) 417 u2, c1 = bits.Mul64(m, q2) 418 t1, c0 = bits.Add64(t2, c1, c0) 419 u3, c1 = bits.Mul64(m, q3) 420 t2, c0 = bits.Add64(t3, c1, c0) 421 u4, c1 = bits.Mul64(m, q4) 422 423 t3, c0 = bits.Add64(0, c1, c0) 424 u4, _ = bits.Add64(u4, 0, c0) 425 t0, c0 = bits.Add64(u0, t0, 0) 426 t1, c0 = bits.Add64(u1, t1, c0) 427 t2, c0 = bits.Add64(u2, t2, c0) 428 t3, c0 = bits.Add64(u3, t3, c0) 429 c2, _ = bits.Add64(c2, 0, c0) 430 t3, c0 = bits.Add64(t4, t3, 0) 431 t4, _ = bits.Add64(u4, c2, c0) 432 433 } 434 { 435 var c0, c1, c2 uint64 436 v := x[2] 437 u0, c1 = bits.Mul64(v, x[0]) 438 t0, c0 = bits.Add64(c1, t0, 0) 439 u1, c1 = bits.Mul64(v, x[1]) 440 t1, c0 = bits.Add64(c1, t1, c0) 441 u2, c1 = bits.Mul64(v, x[2]) 442 t2, c0 = bits.Add64(c1, t2, c0) 443 u3, c1 = bits.Mul64(v, x[3]) 444 t3, c0 = bits.Add64(c1, t3, c0) 445 u4, c1 = bits.Mul64(v, x[4]) 446 t4, c0 = bits.Add64(c1, t4, c0) 447 448 c2, _ = bits.Add64(0, 0, c0) 449 t1, c0 = bits.Add64(u0, t1, 0) 450 t2, c0 = bits.Add64(u1, t2, c0) 451 t3, c0 = bits.Add64(u2, t3, c0) 452 t4, c0 = bits.Add64(u3, t4, c0) 453 c2, _ = bits.Add64(u4, c2, c0) 454 455 m := qInvNeg * t0 456 457 u0, c1 = bits.Mul64(m, q0) 458 _, c0 = bits.Add64(t0, c1, 0) 459 u1, c1 = bits.Mul64(m, q1) 460 t0, c0 = bits.Add64(t1, c1, c0) 461 u2, c1 = bits.Mul64(m, q2) 462 t1, c0 = bits.Add64(t2, c1, c0) 463 u3, c1 = bits.Mul64(m, q3) 464 t2, c0 = bits.Add64(t3, c1, c0) 465 u4, c1 = bits.Mul64(m, q4) 466 467 t3, c0 = bits.Add64(0, c1, c0) 468 u4, _ = bits.Add64(u4, 0, c0) 469 t0, c0 = bits.Add64(u0, t0, 0) 470 t1, c0 = bits.Add64(u1, t1, c0) 471 t2, c0 = bits.Add64(u2, t2, c0) 472 t3, c0 = bits.Add64(u3, t3, c0) 473 c2, _ = bits.Add64(c2, 0, c0) 474 t3, c0 = bits.Add64(t4, t3, 0) 475 t4, _ = bits.Add64(u4, c2, c0) 476 477 } 478 { 479 var c0, c1, c2 uint64 480 v := x[3] 481 u0, c1 = bits.Mul64(v, x[0]) 482 t0, c0 = bits.Add64(c1, t0, 0) 483 u1, c1 = bits.Mul64(v, x[1]) 484 t1, c0 = bits.Add64(c1, t1, c0) 485 u2, c1 = bits.Mul64(v, x[2]) 486 t2, c0 = bits.Add64(c1, t2, c0) 487 u3, c1 = bits.Mul64(v, x[3]) 488 t3, c0 = bits.Add64(c1, t3, c0) 489 u4, c1 = bits.Mul64(v, x[4]) 490 t4, c0 = bits.Add64(c1, t4, c0) 491 492 c2, _ = bits.Add64(0, 0, c0) 493 t1, c0 = bits.Add64(u0, t1, 0) 494 t2, c0 = bits.Add64(u1, t2, c0) 495 t3, c0 = bits.Add64(u2, t3, c0) 496 t4, c0 = bits.Add64(u3, t4, c0) 497 c2, _ = bits.Add64(u4, c2, c0) 498 499 m := qInvNeg * t0 500 501 u0, c1 = bits.Mul64(m, q0) 502 _, c0 = bits.Add64(t0, c1, 0) 503 u1, c1 = bits.Mul64(m, q1) 504 t0, c0 = bits.Add64(t1, c1, c0) 505 u2, c1 = bits.Mul64(m, q2) 506 t1, c0 = bits.Add64(t2, c1, c0) 507 u3, c1 = bits.Mul64(m, q3) 508 t2, c0 = bits.Add64(t3, c1, c0) 509 u4, c1 = bits.Mul64(m, q4) 510 511 t3, c0 = bits.Add64(0, c1, c0) 512 u4, _ = bits.Add64(u4, 0, c0) 513 t0, c0 = bits.Add64(u0, t0, 0) 514 t1, c0 = bits.Add64(u1, t1, c0) 515 t2, c0 = bits.Add64(u2, t2, c0) 516 t3, c0 = bits.Add64(u3, t3, c0) 517 c2, _ = bits.Add64(c2, 0, c0) 518 t3, c0 = bits.Add64(t4, t3, 0) 519 t4, _ = bits.Add64(u4, c2, c0) 520 521 } 522 { 523 var c0, c1, c2 uint64 524 v := x[4] 525 u0, c1 = bits.Mul64(v, x[0]) 526 t0, c0 = bits.Add64(c1, t0, 0) 527 u1, c1 = bits.Mul64(v, x[1]) 528 t1, c0 = bits.Add64(c1, t1, c0) 529 u2, c1 = bits.Mul64(v, x[2]) 530 t2, c0 = bits.Add64(c1, t2, c0) 531 u3, c1 = bits.Mul64(v, x[3]) 532 t3, c0 = bits.Add64(c1, t3, c0) 533 u4, c1 = bits.Mul64(v, x[4]) 534 t4, c0 = bits.Add64(c1, t4, c0) 535 536 c2, _ = bits.Add64(0, 0, c0) 537 t1, c0 = bits.Add64(u0, t1, 0) 538 t2, c0 = bits.Add64(u1, t2, c0) 539 t3, c0 = bits.Add64(u2, t3, c0) 540 t4, c0 = bits.Add64(u3, t4, c0) 541 c2, _ = bits.Add64(u4, c2, c0) 542 543 m := qInvNeg * t0 544 545 u0, c1 = bits.Mul64(m, q0) 546 _, c0 = bits.Add64(t0, c1, 0) 547 u1, c1 = bits.Mul64(m, q1) 548 t0, c0 = bits.Add64(t1, c1, c0) 549 u2, c1 = bits.Mul64(m, q2) 550 t1, c0 = bits.Add64(t2, c1, c0) 551 u3, c1 = bits.Mul64(m, q3) 552 t2, c0 = bits.Add64(t3, c1, c0) 553 u4, c1 = bits.Mul64(m, q4) 554 555 t3, c0 = bits.Add64(0, c1, c0) 556 u4, _ = bits.Add64(u4, 0, c0) 557 t0, c0 = bits.Add64(u0, t0, 0) 558 t1, c0 = bits.Add64(u1, t1, c0) 559 t2, c0 = bits.Add64(u2, t2, c0) 560 t3, c0 = bits.Add64(u3, t3, c0) 561 c2, _ = bits.Add64(c2, 0, c0) 562 t3, c0 = bits.Add64(t4, t3, 0) 563 t4, _ = bits.Add64(u4, c2, c0) 564 565 } 566 z[0] = t0 567 z[1] = t1 568 z[2] = t2 569 z[3] = t3 570 z[4] = t4 571 572 // if z ⩾ q → z -= q 573 if !z.smallerThanModulus() { 574 var b uint64 575 z[0], b = bits.Sub64(z[0], q0, 0) 576 z[1], b = bits.Sub64(z[1], q1, b) 577 z[2], b = bits.Sub64(z[2], q2, b) 578 z[3], b = bits.Sub64(z[3], q3, b) 579 z[4], _ = bits.Sub64(z[4], q4, b) 580 } 581 return z 582 }