github.com/platonnetwork/platon-go@v0.7.6/cases/tool/win/bls_win/include/mcl/fp_tower.hpp (about) 1 #pragma once 2 /** 3 @file 4 @brief finite field extension class 5 @author MITSUNARI Shigeo(@herumi) 6 @license modified new BSD license 7 http://opensource.org/licenses/BSD-3-Clause 8 */ 9 #include <mcl/fp.hpp> 10 11 namespace mcl { 12 13 template<class Fp> 14 class FpDblT : public fp::Serializable<FpDblT<Fp> > { 15 typedef fp::Unit Unit; 16 Unit v_[Fp::maxSize * 2]; 17 public: 18 static size_t getUnitSize() { return Fp::op_.N * 2; } 19 FpDblT() : v_() 20 { 21 } 22 FpDblT(const FpDblT& rhs) 23 { 24 const size_t n = getUnitSize(); 25 for (size_t i = 0; i < n; i++) { 26 v_[i] = rhs.v_[i]; 27 } 28 } 29 void dump() const 30 { 31 const size_t n = getUnitSize(); 32 for (size_t i = 0; i < n; i++) { 33 mcl::fp::dumpUnit(v_[n - 1 - i]); 34 } 35 printf("\n"); 36 } 37 template<class OutputStream> 38 void save(bool *pb, OutputStream& os, int) const 39 { 40 char buf[1024]; 41 size_t n = mcl::fp::arrayToHex(buf, sizeof(buf), v_, getUnitSize()); 42 if (n == 0) { 43 *pb = false; 44 return; 45 } 46 cybozu::write(pb, os, buf + sizeof(buf) - n, sizeof(buf)); 47 } 48 template<class InputStream> 49 void load(bool *pb, InputStream& is, int) 50 { 51 char buf[1024]; 52 *pb = false; 53 size_t n = fp::local::loadWord(buf, sizeof(buf), is); 54 if (n == 0) return; 55 n = fp::hexToArray(v_, getUnitSize(), buf, n); 56 if (n == 0) return; 57 for (size_t i = n; i < getUnitSize(); i++) v_[i] = 0; 58 *pb = true; 59 } 60 #ifndef CYBOZU_DONT_USE_EXCEPTION 61 template<class OutputStream> 62 void save(OutputStream& os, int ioMode = IoSerialize) const 63 { 64 bool b; 65 save(&b, os, ioMode); 66 if (!b) throw cybozu::Exception("FpDblT:save") << ioMode; 67 } 68 template<class InputStream> 69 void load(InputStream& is, int ioMode = IoSerialize) 70 { 71 bool b; 72 load(&b, is, ioMode); 73 if (!b) throw cybozu::Exception("FpDblT:load") << ioMode; 74 } 75 void getMpz(mpz_class& x) const 76 { 77 bool b; 78 getMpz(&b, x); 79 if (!b) throw cybozu::Exception("FpDblT:getMpz"); 80 } 81 mpz_class getMpz() const 82 { 83 mpz_class x; 84 getMpz(x); 85 return x; 86 } 87 #endif 88 void clear() 89 { 90 const size_t n = getUnitSize(); 91 for (size_t i = 0; i < n; i++) { 92 v_[i] = 0; 93 } 94 } 95 FpDblT& operator=(const FpDblT& rhs) 96 { 97 const size_t n = getUnitSize(); 98 for (size_t i = 0; i < n; i++) { 99 v_[i] = rhs.v_[i]; 100 } 101 return *this; 102 } 103 // QQQ : does not check range of x strictly(use for debug) 104 void setMpz(const mpz_class& x) 105 { 106 assert(x >= 0); 107 const size_t xn = gmp::getUnitSize(x); 108 const size_t N2 = getUnitSize(); 109 if (xn > N2) { 110 assert(0); 111 return; 112 } 113 memcpy(v_, gmp::getUnit(x), xn * sizeof(Unit)); 114 memset(v_ + xn, 0, (N2 - xn) * sizeof(Unit)); 115 } 116 void getMpz(bool *pb, mpz_class& x) const 117 { 118 gmp::setArray(pb, x, v_, Fp::op_.N * 2); 119 } 120 #ifdef MCL_XBYAK_DIRECT_CALL 121 static void (*add)(FpDblT& z, const FpDblT& x, const FpDblT& y); 122 static void (*sub)(FpDblT& z, const FpDblT& x, const FpDblT& y); 123 static void (*mod)(Fp& z, const FpDblT& xy); 124 static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); 125 static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y); 126 static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } 127 static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } 128 static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } 129 static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } 130 static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } 131 #else 132 static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); } 133 static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); } 134 static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); } 135 static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); } 136 static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); } 137 #endif 138 static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); } 139 static void sqrPreC(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); } 140 /* 141 mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy) 142 */ 143 static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y); 144 static void (*sqrPre)(FpDblT& xx, const Fp& x); 145 static void mulUnit(FpDblT& z, const FpDblT& x, Unit y) 146 { 147 if (mulSmallUnit(z, x, y)) return; 148 assert(0); // not supported y 149 } 150 static void init() 151 { 152 const mcl::fp::Op& op = Fp::getOp(); 153 #ifdef MCL_XBYAK_DIRECT_CALL 154 add = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_addA_); 155 if (add == 0) add = addC; 156 sub = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subA_); 157 if (sub == 0) sub = subC; 158 mod = fp::func_ptr_cast<void (*)(Fp&, const FpDblT&)>(op.fpDbl_modA_); 159 if (mod == 0) mod = modC; 160 addPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_addPre); 161 if (addPre == 0) addPre = addPreC; 162 subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre); 163 if (subPre == 0) subPre = subPreC; 164 #endif 165 if (op.fpDbl_mulPreA_) { 166 mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_); 167 } else { 168 mulPre = mulPreC; 169 } 170 if (op.fpDbl_sqrPreA_) { 171 sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPreA_); 172 } else { 173 sqrPre = sqrPreC; 174 } 175 } 176 void operator+=(const FpDblT& x) { add(*this, *this, x); } 177 void operator-=(const FpDblT& x) { sub(*this, *this, x); } 178 }; 179 180 #ifdef MCL_XBYAK_DIRECT_CALL 181 template<class Fp> void (*FpDblT<Fp>::add)(FpDblT&, const FpDblT&, const FpDblT&); 182 template<class Fp> void (*FpDblT<Fp>::sub)(FpDblT&, const FpDblT&, const FpDblT&); 183 template<class Fp> void (*FpDblT<Fp>::mod)(Fp&, const FpDblT&); 184 template<class Fp> void (*FpDblT<Fp>::addPre)(FpDblT&, const FpDblT&, const FpDblT&); 185 template<class Fp> void (*FpDblT<Fp>::subPre)(FpDblT&, const FpDblT&, const FpDblT&); 186 #endif 187 template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&); 188 template<class Fp> void (*FpDblT<Fp>::sqrPre)(FpDblT&, const Fp&); 189 190 template<class Fp> struct Fp12T; 191 template<class Fp> class BNT; 192 template<class Fp> struct Fp2DblT; 193 /* 194 beta = -1 195 Fp2 = F[i] / (i^2 + 1) 196 x = a + bi 197 */ 198 template<class _Fp> 199 class Fp2T : public fp::Serializable<Fp2T<_Fp>, 200 fp::Operator<Fp2T<_Fp> > > { 201 typedef _Fp Fp; 202 typedef fp::Unit Unit; 203 typedef FpDblT<Fp> FpDbl; 204 typedef Fp2DblT<Fp> Fp2Dbl; 205 static const size_t gN = 5; 206 /* 207 g = xi^((p - 1) / 6) 208 g[] = { g^2, g^4, g^1, g^3, g^5 } 209 */ 210 static Fp2T g[gN]; 211 static Fp2T g2[gN]; 212 static Fp2T g3[gN]; 213 public: 214 static const Fp2T *get_gTbl() { return &g[0]; } 215 static const Fp2T *get_g2Tbl() { return &g2[0]; } 216 static const Fp2T *get_g3Tbl() { return &g3[0]; } 217 typedef typename Fp::BaseFp BaseFp; 218 static const size_t maxSize = Fp::maxSize * 2; 219 static inline size_t getByteSize() { return Fp::getByteSize() * 2; } 220 void dump() const 221 { 222 a.dump(); 223 b.dump(); 224 } 225 Fp a, b; 226 Fp2T() { } 227 Fp2T(int64_t a) : a(a), b(0) { } 228 Fp2T(const Fp& a, const Fp& b) : a(a), b(b) { } 229 Fp2T(int64_t a, int64_t b) : a(a), b(b) { } 230 Fp* getFp0() { return &a; } 231 const Fp* getFp0() const { return &a; } 232 const Unit* getUnit() const { return a.getUnit(); } 233 void clear() 234 { 235 a.clear(); 236 b.clear(); 237 } 238 void set(const Fp &a_, const Fp &b_) 239 { 240 a = a_; 241 b = b_; 242 } 243 #ifdef MCL_XBYAK_DIRECT_CALL 244 static void (*add)(Fp2T& z, const Fp2T& x, const Fp2T& y); 245 static void (*sub)(Fp2T& z, const Fp2T& x, const Fp2T& y); 246 static void (*neg)(Fp2T& y, const Fp2T& x); 247 static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); 248 static void (*sqr)(Fp2T& y, const Fp2T& x); 249 #else 250 static void add(Fp2T& z, const Fp2T& x, const Fp2T& y) { addC(z, x, y); } 251 static void sub(Fp2T& z, const Fp2T& x, const Fp2T& y) { subC(z, x, y); } 252 static void neg(Fp2T& y, const Fp2T& x) { negC(y, x); } 253 static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) { mulC(z, x, y); } 254 static void sqr(Fp2T& y, const Fp2T& x) { sqrC(y, x); } 255 #endif 256 static void (*mul_xi)(Fp2T& y, const Fp2T& x); 257 static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); } 258 static void inv(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_inv(y.a.v_, x.a.v_); } 259 static void divBy2(Fp2T& y, const Fp2T& x) 260 { 261 Fp::divBy2(y.a, x.a); 262 Fp::divBy2(y.b, x.b); 263 } 264 static void divBy4(Fp2T& y, const Fp2T& x) 265 { 266 Fp::divBy4(y.a, x.a); 267 Fp::divBy4(y.b, x.b); 268 } 269 static void mulFp(Fp2T& z, const Fp2T& x, const Fp& y) 270 { 271 Fp::mul(z.a, x.a, y); 272 Fp::mul(z.b, x.b, y); 273 } 274 template<class S> 275 void setArray(bool *pb, const S *buf, size_t n) 276 { 277 assert((n & 1) == 0); 278 n /= 2; 279 a.setArray(pb, buf, n); 280 if (!*pb) return; 281 b.setArray(pb, buf + n, n); 282 } 283 template<class InputStream> 284 void load(bool *pb, InputStream& is, int ioMode) 285 { 286 Fp *ap = &a, *bp = &b; 287 if (Fp::isETHserialization_ && ioMode & (IoSerialize | IoSerializeHexStr)) { 288 fp::swap_(ap, bp); 289 } 290 ap->load(pb, is, ioMode); 291 if (!*pb) return; 292 bp->load(pb, is, ioMode); 293 } 294 /* 295 Fp2T = <a> + ' ' + <b> 296 */ 297 template<class OutputStream> 298 void save(bool *pb, OutputStream& os, int ioMode) const 299 { 300 const Fp *ap = &a, *bp = &b; 301 if (Fp::isETHserialization_ && ioMode & (IoSerialize | IoSerializeHexStr)) { 302 fp::swap_(ap, bp); 303 } 304 const char sep = *fp::getIoSeparator(ioMode); 305 ap->save(pb, os, ioMode); 306 if (!*pb) return; 307 if (sep) { 308 cybozu::writeChar(pb, os, sep); 309 if (!*pb) return; 310 } 311 bp->save(pb, os, ioMode); 312 } 313 bool isZero() const { return a.isZero() && b.isZero(); } 314 bool isOne() const { return a.isOne() && b.isZero(); } 315 bool operator==(const Fp2T& rhs) const { return a == rhs.a && b == rhs.b; } 316 bool operator!=(const Fp2T& rhs) const { return !operator==(rhs); } 317 /* 318 return true is a is odd (do not consider b) 319 this function is for only compressed reprezentation of EC 320 isOdd() is not good naming. QQQ 321 */ 322 bool isOdd() const { return a.isOdd(); } 323 /* 324 (a + bi)^2 = (a^2 - b^2) + 2ab i = c + di 325 A = a^2 326 B = b^2 327 A = (c +/- sqrt(c^2 + d^2))/2 328 b = d / 2a 329 */ 330 static inline bool squareRoot(Fp2T& y, const Fp2T& x) 331 { 332 Fp t1, t2; 333 if (x.b.isZero()) { 334 if (Fp::squareRoot(t1, x.a)) { 335 y.a = t1; 336 y.b.clear(); 337 } else { 338 bool b = Fp::squareRoot(t1, -x.a); 339 assert(b); (void)b; 340 y.a.clear(); 341 y.b = t1; 342 } 343 return true; 344 } 345 Fp::sqr(t1, x.a); 346 Fp::sqr(t2, x.b); 347 t1 += t2; // c^2 + d^2 348 if (!Fp::squareRoot(t1, t1)) return false; 349 Fp::add(t2, x.a, t1); 350 Fp::divBy2(t2, t2); 351 if (!Fp::squareRoot(t2, t2)) { 352 Fp::sub(t2, x.a, t1); 353 Fp::divBy2(t2, t2); 354 bool b = Fp::squareRoot(t2, t2); 355 assert(b); (void)b; 356 } 357 y.a = t2; 358 t2 += t2; 359 Fp::inv(t2, t2); 360 Fp::mul(y.b, x.b, t2); 361 return true; 362 } 363 static void inline norm(Fp& y, const Fp2T& x) 364 { 365 Fp aa, bb; 366 Fp::sqr(aa, x.a); 367 Fp::sqr(bb, x.b); 368 Fp::add(y, aa, bb); 369 } 370 /* 371 Frobenius 372 i^2 = -1 373 (a + bi)^p = a + bi^p in Fp 374 = a + bi if p = 1 mod 4 375 = a - bi if p = 3 mod 4 376 */ 377 static void Frobenius(Fp2T& y, const Fp2T& x) 378 { 379 if (Fp::getOp().pmod4 == 1) { 380 if (&y != &x) { 381 y = x; 382 } 383 } else { 384 if (&y != &x) { 385 y.a = x.a; 386 } 387 Fp::neg(y.b, x.b); 388 } 389 } 390 391 static uint32_t get_xi_a() { return Fp::getOp().xi_a; } 392 static void init() 393 { 394 // assert(Fp::maxSize <= 256); 395 mcl::fp::Op& op = Fp::op_; 396 assert(op.xi_a); 397 mul_xi = 0; 398 #ifdef MCL_XBYAK_DIRECT_CALL 399 add = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_addA_); 400 if (add == 0) add = addC; 401 sub = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_subA_); 402 if (sub == 0) sub = subC; 403 neg = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_negA_); 404 if (neg == 0) neg = negC; 405 mul = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_mulA_); 406 if (mul == 0) mul = mulC; 407 sqr = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_sqrA_); 408 if (sqr == 0) sqr = sqrC; 409 mul_xi = fp::func_ptr_cast<void (*)(Fp2T&, const Fp2T&)>(op.fp2_mul_xiA_); 410 #endif 411 op.fp2_inv = fp2_invW; 412 if (mul_xi == 0) { 413 if (op.xi_a == 1) { 414 mul_xi = fp2_mul_xi_1_1iC; 415 } else { 416 mul_xi = fp2_mul_xiC; 417 } 418 } 419 FpDblT<Fp>::init(); 420 Fp2DblT<Fp>::init(); 421 // call init before Fp2::pow because FpDbl is used in Fp2T 422 const Fp2T xi(op.xi_a, 1); 423 const mpz_class& p = Fp::getOp().mp; 424 Fp2T::pow(g[0], xi, (p - 1) / 6); // g = xi^((p-1)/6) 425 for (size_t i = 1; i < gN; i++) { 426 g[i] = g[i - 1] * g[0]; 427 } 428 /* 429 permutate [0, 1, 2, 3, 4] => [1, 3, 0, 2, 4] 430 g[0] = g^2 431 g[1] = g^4 432 g[2] = g^1 433 g[3] = g^3 434 g[4] = g^5 435 */ 436 { 437 Fp2T t = g[0]; 438 g[0] = g[1]; 439 g[1] = g[3]; 440 g[3] = g[2]; 441 g[2] = t; 442 } 443 for (size_t i = 0; i < gN; i++) { 444 Fp2T t(g[i].a, g[i].b); 445 if (Fp::getOp().pmod4 == 3) Fp::neg(t.b, t.b); 446 Fp2T::mul(g2[i], t, g[i]); 447 g3[i] = g[i] * g2[i]; 448 } 449 } 450 #ifndef CYBOZU_DONT_USE_EXCEPTION 451 template<class InputStream> 452 void load(InputStream& is, int ioMode = IoSerialize) 453 { 454 bool b; 455 load(&b, is, ioMode); 456 if (!b) throw cybozu::Exception("Fp2T:load"); 457 } 458 template<class OutputStream> 459 void save(OutputStream& os, int ioMode = IoSerialize) const 460 { 461 bool b; 462 save(&b, os, ioMode); 463 if (!b) throw cybozu::Exception("Fp2T:save"); 464 } 465 template<class S> 466 void setArray(const S *buf, size_t n) 467 { 468 bool b; 469 setArray(&b, buf, n); 470 if (!b) throw cybozu::Exception("Fp2T:setArray"); 471 } 472 #endif 473 #ifndef CYBOZU_DONT_USE_STRING 474 Fp2T(const std::string& a, const std::string& b, int base = 0) : a(a, base), b(b, base) {} 475 friend std::istream& operator>>(std::istream& is, Fp2T& self) 476 { 477 self.load(is, fp::detectIoMode(Fp::BaseFp::getIoMode(), is)); 478 return is; 479 } 480 friend std::ostream& operator<<(std::ostream& os, const Fp2T& self) 481 { 482 self.save(os, fp::detectIoMode(Fp::BaseFp::getIoMode(), os)); 483 return os; 484 } 485 #endif 486 private: 487 /* 488 default Fp2T operator 489 Fp2T = Fp[i]/(i^2 + 1) 490 */ 491 static void addC(Fp2T& z, const Fp2T& x, const Fp2T& y) 492 { 493 Fp::add(z.a, x.a, y.a); 494 Fp::add(z.b, x.b, y.b); 495 } 496 static void subC(Fp2T& z, const Fp2T& x, const Fp2T& y) 497 { 498 Fp::sub(z.a, x.a, y.a); 499 Fp::sub(z.b, x.b, y.b); 500 } 501 static void negC(Fp2T& y, const Fp2T& x) 502 { 503 Fp::neg(y.a, x.a); 504 Fp::neg(y.b, x.b); 505 } 506 #if 0 507 /* 508 x = a + bi, y = c + di, i^2 = -1 509 z = xy = (a + bi)(c + di) = (ac - bd) + (ad + bc)i 510 ad+bc = (a + b)(c + d) - ac - bd 511 # of mod = 3 512 */ 513 static void fp2_mulW(Unit *z, const Unit *x, const Unit *y) 514 { 515 const Fp *px = reinterpret_cast<const Fp*>(x); 516 const Fp *py = reinterpret_cast<const Fp*>(y); 517 const Fp& a = px[0]; 518 const Fp& b = px[1]; 519 const Fp& c = py[0]; 520 const Fp& d = py[1]; 521 Fp *pz = reinterpret_cast<Fp*>(z); 522 Fp t1, t2, ac, bd; 523 Fp::add(t1, a, b); 524 Fp::add(t2, c, d); 525 t1 *= t2; // (a + b)(c + d) 526 Fp::mul(ac, a, c); 527 Fp::mul(bd, b, d); 528 Fp::sub(pz[0], ac, bd); // ac - bd 529 Fp::sub(pz[1], t1, ac); 530 pz[1] -= bd; 531 } 532 static void fp2_mulNFW(Fp2T& z, const Fp2T& x, const Fp2T& y) 533 { 534 const fp::Op& op = Fp::op_; 535 op.fp2_mulNF((Unit*)&z, (const Unit*)&x, (const Unit*)&y, op.p); 536 } 537 #endif 538 static void mulC(Fp2T& z, const Fp2T& x, const Fp2T& y) 539 { 540 Fp2Dbl d; 541 Fp2Dbl::mulPre(d, x, y); 542 FpDbl::mod(z.a, d.a); 543 FpDbl::mod(z.b, d.b); 544 } 545 /* 546 x = a + bi, i^2 = -1 547 y = x^2 = (a + bi)^2 = (a + b)(a - b) + 2abi 548 */ 549 static void sqrC(Fp2T& y, const Fp2T& x) 550 { 551 const Fp& a = x.a; 552 const Fp& b = x.b; 553 #if 1 // faster than using FpDbl 554 Fp t1, t2, t3; 555 Fp::add(t1, b, b); // 2b 556 t1 *= a; // 2ab 557 Fp::add(t2, a, b); // a + b 558 Fp::sub(t3, a, b); // a - b 559 Fp::mul(y.a, t2, t3); // (a + b)(a - b) 560 y.b = t1; 561 #else 562 Fp t1, t2; 563 FpDbl d1, d2; 564 Fp::addPre(t1, b, b); // 2b 565 FpDbl::mulPre(d2, t1, a); // 2ab 566 Fp::addPre(t1, a, b); // a + b 567 Fp::sub(t2, a, b); // a - b 568 FpDbl::mulPre(d1, t1, t2); // (a + b)(a - b) 569 FpDbl::mod(py[0], d1); 570 FpDbl::mod(py[1], d2); 571 #endif 572 } 573 /* 574 xi = xi_a + i 575 x = a + bi 576 y = (a + bi)xi = (a + bi)(xi_a + i) 577 =(a * x_ia - b) + (a + b xi_a)i 578 */ 579 static void fp2_mul_xiC(Fp2T& y, const Fp2T& x) 580 { 581 const Fp& a = x.a; 582 const Fp& b = x.b; 583 Fp t; 584 Fp::mulUnit(t, a, Fp::getOp().xi_a); 585 t -= b; 586 Fp::mulUnit(y.b, b, Fp::getOp().xi_a); 587 y.b += a; 588 y.a = t; 589 } 590 /* 591 xi = 1 + i ; xi_a = 1 592 y = (a + bi)xi = (a - b) + (a + b)i 593 */ 594 static void fp2_mul_xi_1_1iC(Fp2T& y, const Fp2T& x) 595 { 596 const Fp& a = x.a; 597 const Fp& b = x.b; 598 Fp t; 599 Fp::add(t, a, b); 600 Fp::sub(y.a, a, b); 601 y.b = t; 602 } 603 /* 604 x = a + bi 605 1 / x = (a - bi) / (a^2 + b^2) 606 */ 607 static void fp2_invW(Unit *y, const Unit *x) 608 { 609 const Fp *px = reinterpret_cast<const Fp*>(x); 610 Fp *py = reinterpret_cast<Fp*>(y); 611 const Fp& a = px[0]; 612 const Fp& b = px[1]; 613 Fp aa, bb; 614 Fp::sqr(aa, a); 615 Fp::sqr(bb, b); 616 aa += bb; 617 Fp::inv(aa, aa); // aa = 1 / (a^2 + b^2) 618 Fp::mul(py[0], a, aa); 619 Fp::mul(py[1], b, aa); 620 Fp::neg(py[1], py[1]); 621 } 622 }; 623 624 #ifdef MCL_XBYAK_DIRECT_CALL 625 template<class Fp_> void (*Fp2T<Fp_>::add)(Fp2T& z, const Fp2T& x, const Fp2T& y); 626 template<class Fp_> void (*Fp2T<Fp_>::sub)(Fp2T& z, const Fp2T& x, const Fp2T& y); 627 template<class Fp_> void (*Fp2T<Fp_>::neg)(Fp2T& y, const Fp2T& x); 628 template<class Fp_> void (*Fp2T<Fp_>::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y); 629 template<class Fp_> void (*Fp2T<Fp_>::sqr)(Fp2T& y, const Fp2T& x); 630 #endif 631 template<class Fp_> void (*Fp2T<Fp_>::mul_xi)(Fp2T& y, const Fp2T& x); 632 633 template<class Fp> 634 struct Fp2DblT { 635 typedef FpDblT<Fp> FpDbl; 636 typedef Fp2T<Fp> Fp2; 637 typedef fp::Unit Unit; 638 FpDbl a, b; 639 static void add(Fp2DblT& z, const Fp2DblT& x, const Fp2DblT& y) 640 { 641 FpDbl::add(z.a, x.a, y.a); 642 FpDbl::add(z.b, x.b, y.b); 643 } 644 static void addPre(Fp2DblT& z, const Fp2DblT& x, const Fp2DblT& y) 645 { 646 FpDbl::addPre(z.a, x.a, y.a); 647 FpDbl::addPre(z.b, x.b, y.b); 648 } 649 static void sub(Fp2DblT& z, const Fp2DblT& x, const Fp2DblT& y) 650 { 651 FpDbl::sub(z.a, x.a, y.a); 652 FpDbl::sub(z.b, x.b, y.b); 653 } 654 static void subPre(Fp2DblT& z, const Fp2DblT& x, const Fp2DblT& y) 655 { 656 FpDbl::subPre(z.a, x.a, y.a); 657 FpDbl::subPre(z.b, x.b, y.b); 658 } 659 static void neg(Fp2DblT& y, const Fp2DblT& x) 660 { 661 FpDbl::neg(y.a, x.a); 662 FpDbl::neg(y.b, x.b); 663 } 664 static void mul_xi(Fp2DblT& y, const Fp2DblT& x) 665 { 666 const uint32_t xi_a = Fp2::get_xi_a(); 667 if (xi_a == 1) { 668 FpDbl t; 669 FpDbl::add(t, x.a, x.b); 670 FpDbl::sub(y.a, x.a, x.b); 671 y.b = t; 672 } else { 673 FpDbl t; 674 FpDbl::mulUnit(t, x.a, xi_a); 675 FpDbl::sub(t, t, x.b); 676 FpDbl::mulUnit(y.b, x.b, xi_a); 677 FpDbl::add(y.b, y.b, x.a); 678 y.a = t; 679 } 680 } 681 static void (*mulPre)(Fp2DblT&, const Fp2&, const Fp2&); 682 static void (*sqrPre)(Fp2DblT&, const Fp2&); 683 static void mod(Fp2& y, const Fp2DblT& x) 684 { 685 FpDbl::mod(y.a, x.a); 686 FpDbl::mod(y.b, x.b); 687 } 688 #ifndef CYBOZU_DONT_USE_STRING 689 friend std::ostream& operator<<(std::ostream& os, const Fp2DblT& x) 690 { 691 return os << x.a << ' ' << x.b; 692 } 693 #endif 694 void operator+=(const Fp2DblT& x) { add(*this, *this, x); } 695 void operator-=(const Fp2DblT& x) { sub(*this, *this, x); } 696 static void init() 697 { 698 const mcl::fp::Op& op = Fp::getOp(); 699 if (op.fp2Dbl_mulPreA_) { 700 mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_); 701 } else { 702 if (op.isFullBit) { 703 mulPre = fp2Dbl_mulPreW<true>; 704 } else { 705 mulPre = fp2Dbl_mulPreW<false>; 706 } 707 } 708 if (op.fp2Dbl_sqrPreA_) { 709 sqrPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&)>(op.fp2Dbl_sqrPreA_); 710 } else { 711 if (op.isFullBit) { 712 sqrPre = fp2Dbl_sqrPreW<true>; 713 } else { 714 sqrPre = fp2Dbl_sqrPreW<false>; 715 } 716 } 717 } 718 /* 719 Fp2Dbl::mulPre by FpDblT 720 @note mod of NIST_P192 is fast 721 */ 722 template<bool isFullBit> 723 static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y) 724 { 725 const Fp& a = x.a; 726 const Fp& b = x.b; 727 const Fp& c = y.a; 728 const Fp& d = y.b; 729 FpDbl& d0 = z.a; 730 FpDbl& d1 = z.b; 731 FpDbl d2; 732 Fp s, t; 733 if (isFullBit) { 734 Fp::add(s, a, b); 735 Fp::add(t, c, d); 736 } else { 737 Fp::addPre(s, a, b); 738 Fp::addPre(t, c, d); 739 } 740 FpDbl::mulPre(d1, s, t); // (a + b)(c + d) 741 FpDbl::mulPre(d0, a, c); 742 FpDbl::mulPre(d2, b, d); 743 if (isFullBit) { 744 FpDbl::sub(d1, d1, d0); // (a + b)(c + d) - ac 745 FpDbl::sub(d1, d1, d2); // (a + b)(c + d) - ac - bd 746 } else { 747 FpDbl::subPre(d1, d1, d0); 748 FpDbl::subPre(d1, d1, d2); 749 } 750 FpDbl::sub(d0, d0, d2); // ac - bd 751 } 752 template<bool isFullBit> 753 static void fp2Dbl_sqrPreW(Fp2DblT& y, const Fp2& x) 754 { 755 Fp t1, t2; 756 if (isFullBit) { 757 Fp::add(t1, x.b, x.b); // 2b 758 Fp::add(t2, x.a, x.b); // a + b 759 } else { 760 Fp::addPre(t1, x.b, x.b); // 2b 761 Fp::addPre(t2, x.a, x.b); // a + b 762 } 763 FpDbl::mulPre(y.b, t1, x.a); // 2ab 764 Fp::sub(t1, x.a, x.b); // a - b 765 FpDbl::mulPre(y.a, t1, t2); // (a + b)(a - b) 766 } 767 }; 768 769 template<class Fp> void (*Fp2DblT<Fp>::mulPre)(Fp2DblT&, const Fp2T<Fp>&, const Fp2T<Fp>&); 770 template<class Fp> void (*Fp2DblT<Fp>::sqrPre)(Fp2DblT&, const Fp2T<Fp>&); 771 772 template<class Fp> Fp2T<Fp> Fp2T<Fp>::g[Fp2T<Fp>::gN]; 773 template<class Fp> Fp2T<Fp> Fp2T<Fp>::g2[Fp2T<Fp>::gN]; 774 template<class Fp> Fp2T<Fp> Fp2T<Fp>::g3[Fp2T<Fp>::gN]; 775 776 template<class Fp> 777 struct Fp6DblT; 778 /* 779 Fp6T = Fp2[v] / (v^3 - xi) 780 x = a + b v + c v^2 781 */ 782 template<class _Fp> 783 struct Fp6T : public fp::Serializable<Fp6T<_Fp>, 784 fp::Operator<Fp6T<_Fp> > > { 785 typedef _Fp Fp; 786 typedef Fp2T<Fp> Fp2; 787 typedef Fp2DblT<Fp> Fp2Dbl; 788 typedef Fp6DblT<Fp> Fp6Dbl; 789 typedef Fp BaseFp; 790 Fp2 a, b, c; 791 Fp6T() { } 792 Fp6T(int64_t a) : a(a) , b(0) , c(0) { } 793 Fp6T(const Fp2& a, const Fp2& b, const Fp2& c) : a(a) , b(b) , c(c) { } 794 void clear() 795 { 796 a.clear(); 797 b.clear(); 798 c.clear(); 799 } 800 Fp* getFp0() { return a.getFp0(); } 801 const Fp* getFp0() const { return a.getFp0(); } 802 Fp2* getFp2() { return &a; } 803 const Fp2* getFp2() const { return &a; } 804 void set(const Fp2 &a_, const Fp2 &b_, const Fp2 &c_) 805 { 806 a = a_; 807 b = b_; 808 c = c_; 809 } 810 bool isZero() const 811 { 812 return a.isZero() && b.isZero() && c.isZero(); 813 } 814 bool isOne() const 815 { 816 return a.isOne() && b.isZero() && c.isZero(); 817 } 818 bool operator==(const Fp6T& rhs) const 819 { 820 return a == rhs.a && b == rhs.b && c == rhs.c; 821 } 822 bool operator!=(const Fp6T& rhs) const { return !operator==(rhs); } 823 template<class InputStream> 824 void load(bool *pb, InputStream& is, int ioMode) 825 { 826 a.load(pb, is, ioMode); if (!*pb) return; 827 b.load(pb, is, ioMode); if (!*pb) return; 828 c.load(pb, is, ioMode); if (!*pb) return; 829 } 830 template<class OutputStream> 831 void save(bool *pb, OutputStream& os, int ioMode) const 832 { 833 const char sep = *fp::getIoSeparator(ioMode); 834 a.save(pb, os, ioMode); if (!*pb) return; 835 if (sep) { 836 cybozu::writeChar(pb, os, sep); 837 if (!*pb) return; 838 } 839 b.save(pb, os, ioMode); if (!*pb) return; 840 if (sep) { 841 cybozu::writeChar(pb, os, sep); 842 if (!*pb) return; 843 } 844 c.save(pb, os, ioMode); 845 } 846 #ifndef CYBOZU_DONT_USE_EXCEPTION 847 template<class InputStream> 848 void load(InputStream& is, int ioMode = IoSerialize) 849 { 850 bool b; 851 load(&b, is, ioMode); 852 if (!b) throw cybozu::Exception("Fp6T:load"); 853 } 854 template<class OutputStream> 855 void save(OutputStream& os, int ioMode = IoSerialize) const 856 { 857 bool b; 858 save(&b, os, ioMode); 859 if (!b) throw cybozu::Exception("Fp6T:save"); 860 } 861 #endif 862 #ifndef CYBOZU_DONT_USE_STRING 863 friend std::istream& operator>>(std::istream& is, Fp6T& self) 864 { 865 self.load(is, fp::detectIoMode(Fp::BaseFp::getIoMode(), is)); 866 return is; 867 } 868 friend std::ostream& operator<<(std::ostream& os, const Fp6T& self) 869 { 870 self.save(os, fp::detectIoMode(Fp::BaseFp::getIoMode(), os)); 871 return os; 872 } 873 #endif 874 static void add(Fp6T& z, const Fp6T& x, const Fp6T& y) 875 { 876 Fp2::add(z.a, x.a, y.a); 877 Fp2::add(z.b, x.b, y.b); 878 Fp2::add(z.c, x.c, y.c); 879 } 880 static void sub(Fp6T& z, const Fp6T& x, const Fp6T& y) 881 { 882 Fp2::sub(z.a, x.a, y.a); 883 Fp2::sub(z.b, x.b, y.b); 884 Fp2::sub(z.c, x.c, y.c); 885 } 886 static void neg(Fp6T& y, const Fp6T& x) 887 { 888 Fp2::neg(y.a, x.a); 889 Fp2::neg(y.b, x.b); 890 Fp2::neg(y.c, x.c); 891 } 892 /* 893 x = a + bv + cv^2, v^3 = xi 894 x^2 = (a^2 + 2bc xi) + (c^2 xi + 2ab)v + (b^2 + 2ac)v^2 895 896 b^2 + 2ac = (a + b + c)^2 - a^2 - 2bc - c^2 - 2ab 897 */ 898 static void sqr(Fp6T& y, const Fp6T& x) 899 { 900 Fp2 t1, t2, t3; 901 Fp2::mul(t1, x.a, x.b); 902 t1 += t1; // 2ab 903 Fp2::mul(t2, x.b, x.c); 904 t2 += t2; // 2bc 905 Fp2::sqr(t3, x.c); // c^2 906 Fp2::add(y.c, x.a, x.c); // a + c, destroy y.c 907 y.c += x.b; // a + b + c 908 Fp2::sqr(y.b, y.c); // (a + b + c)^2, destroy y.b 909 y.b -= t2; // (a + b + c)^2 - 2bc 910 Fp2::mul_xi(t2, t2); // 2bc xi 911 Fp2::sqr(y.a, x.a); // a^2, destroy y.a 912 y.b -= y.a; // (a + b + c)^2 - 2bc - a^2 913 y.a += t2; // a^2 + 2bc xi 914 Fp2::sub(y.c, y.b, t3); // (a + b + c)^2 - 2bc - a^2 - c^2 915 Fp2::mul_xi(y.b, t3); // c^2 xi 916 y.b += t1; // c^2 xi + 2ab 917 y.c -= t1; // b^2 + 2ac 918 } 919 static inline void mul(Fp6T& z, const Fp6T& x, const Fp6T& y); 920 /* 921 x = a + bv + cv^2, v^3 = xi 922 y = 1/x = p/q where 923 p = (a^2 - bc xi) + (c^2 xi - ab)v + (b^2 - ac)v^2 924 q = c^3 xi^2 + b(b^2 - 3ac)xi + a^3 925 = (a^2 - bc xi)a + ((c^2 xi - ab)c + (b^2 - ac)b) xi 926 */ 927 static void inv(Fp6T& y, const Fp6T& x) 928 { 929 const Fp2& a = x.a; 930 const Fp2& b = x.b; 931 const Fp2& c = x.c; 932 Fp2 aa, bb, cc, ab, bc, ac; 933 Fp2::sqr(aa, a); 934 Fp2::sqr(bb, b); 935 Fp2::sqr(cc, c); 936 Fp2::mul(ab, a, b); 937 Fp2::mul(bc, b, c); 938 Fp2::mul(ac, c, a); 939 940 Fp6T p; 941 Fp2::mul_xi(p.a, bc); 942 Fp2::sub(p.a, aa, p.a); // a^2 - bc xi 943 Fp2::mul_xi(p.b, cc); 944 p.b -= ab; // c^2 xi - ab 945 Fp2::sub(p.c, bb, ac); // b^2 - ac 946 Fp2 q, t; 947 Fp2::mul(q, p.b, c); 948 Fp2::mul(t, p.c, b); 949 q += t; 950 Fp2::mul_xi(q, q); 951 Fp2::mul(t, p.a, a); 952 q += t; 953 Fp2::inv(q, q); 954 955 Fp2::mul(y.a, p.a, q); 956 Fp2::mul(y.b, p.b, q); 957 Fp2::mul(y.c, p.c, q); 958 } 959 }; 960 961 template<class Fp> 962 struct Fp6DblT { 963 typedef Fp2T<Fp> Fp2; 964 typedef Fp6T<Fp> Fp6; 965 typedef Fp2DblT<Fp> Fp2Dbl; 966 typedef Fp6DblT<Fp> Fp6Dbl; 967 typedef fp::Unit Unit; 968 Fp2Dbl a, b, c; 969 static void add(Fp6Dbl& z, const Fp6Dbl& x, const Fp6Dbl& y) 970 { 971 Fp2Dbl::add(z.a, x.a, y.a); 972 Fp2Dbl::add(z.b, x.b, y.b); 973 Fp2Dbl::add(z.c, x.c, y.c); 974 } 975 static void sub(Fp6Dbl& z, const Fp6Dbl& x, const Fp6Dbl& y) 976 { 977 Fp2Dbl::sub(z.a, x.a, y.a); 978 Fp2Dbl::sub(z.b, x.b, y.b); 979 Fp2Dbl::sub(z.c, x.c, y.c); 980 } 981 /* 982 x = a + bv + cv^2, y = d + ev + fv^2, v^3 = xi 983 xy = (ad + (bf + ce)xi) + ((ae + bd) + cf xi)v + ((af + cd) + be)v^2 984 bf + ce = (b + c)(e + f) - be - cf 985 ae + bd = (a + b)(e + d) - ad - be 986 af + cd = (a + c)(d + f) - ad - cf 987 */ 988 static void mulPre(Fp6DblT& z, const Fp6& x, const Fp6& y) 989 { 990 //clk.begin(); 991 const Fp2& a = x.a; 992 const Fp2& b = x.b; 993 const Fp2& c = x.c; 994 const Fp2& d = y.a; 995 const Fp2& e = y.b; 996 const Fp2& f = y.c; 997 Fp2Dbl& za = z.a; 998 Fp2Dbl& zb = z.b; 999 Fp2Dbl& zc = z.c; 1000 Fp2Dbl BE; 1001 Fp2Dbl::mulPre(za, a, d); 1002 Fp2Dbl::mulPre(BE, b, e); 1003 Fp2Dbl::mulPre(zb, c, f); 1004 1005 Fp2 t1, t2, t3, t4; 1006 Fp2::add(t1, b, c); 1007 Fp2::add(t2, e, f); 1008 Fp2Dbl T1; 1009 Fp2Dbl::mulPre(T1, t1, t2); 1010 Fp2Dbl::sub(T1, T1, BE); 1011 Fp2Dbl::sub(T1, T1, zb); 1012 Fp2Dbl::mul_xi(T1, T1); 1013 1014 Fp2::add(t2, a, b); 1015 Fp2::add(t3, e, d); 1016 Fp2Dbl T2; 1017 Fp2Dbl::mulPre(T2, t2, t3); 1018 Fp2Dbl::sub(T2, T2, za); 1019 Fp2Dbl::sub(T2, T2, BE); 1020 1021 Fp2::add(t3, a, c); 1022 Fp2::add(t4, d, f); 1023 Fp2Dbl::mulPre(zc, t3, t4); 1024 Fp2Dbl::sub(zc, zc, za); 1025 Fp2Dbl::sub(zc, zc, zb); 1026 1027 Fp2Dbl::add(za, za, T1); 1028 Fp2Dbl::mul_xi(zb, zb); 1029 Fp2Dbl::add(zb, zb, T2); 1030 Fp2Dbl::add(zc, zc, BE); 1031 //clk.end(); 1032 } 1033 static void mod(Fp6& y, const Fp6Dbl& x) 1034 { 1035 Fp2Dbl::mod(y.a, x.a); 1036 Fp2Dbl::mod(y.b, x.b); 1037 Fp2Dbl::mod(y.c, x.c); 1038 } 1039 }; 1040 1041 template<class Fp> 1042 inline void Fp6T<Fp>::mul(Fp6T<Fp>& z, const Fp6T<Fp>& x, const Fp6T<Fp>& y) 1043 { 1044 Fp6DblT<Fp> Z; 1045 Fp6DblT<Fp>::mulPre(Z, x, y); 1046 Fp6DblT<Fp>::mod(z, Z); 1047 } 1048 1049 /* 1050 Fp12T = Fp6[w] / (w^2 - v) 1051 x = a + b w 1052 */ 1053 template<class Fp> 1054 struct Fp12T : public fp::Serializable<Fp12T<Fp>, 1055 fp::Operator<Fp12T<Fp> > > { 1056 typedef Fp2T<Fp> Fp2; 1057 typedef Fp6T<Fp> Fp6; 1058 typedef Fp2DblT<Fp> Fp2Dbl; 1059 typedef Fp6DblT<Fp> Fp6Dbl; 1060 typedef Fp BaseFp; 1061 Fp6 a, b; 1062 Fp12T() {} 1063 Fp12T(int64_t a) : a(a), b(0) {} 1064 Fp12T(const Fp6& a, const Fp6& b) : a(a), b(b) {} 1065 void clear() 1066 { 1067 a.clear(); 1068 b.clear(); 1069 } 1070 void setOne() 1071 { 1072 clear(); 1073 a.a.a = 1; 1074 } 1075 1076 Fp* getFp0() { return a.getFp0(); } 1077 const Fp* getFp0() const { return a.getFp0(); } 1078 Fp2* getFp2() { return a.getFp2(); } 1079 const Fp2* getFp2() const { return a.getFp2(); } 1080 void set(const Fp2& v0, const Fp2& v1, const Fp2& v2, const Fp2& v3, const Fp2& v4, const Fp2& v5) 1081 { 1082 a.set(v0, v1, v2); 1083 b.set(v3, v4, v5); 1084 } 1085 1086 bool isZero() const 1087 { 1088 return a.isZero() && b.isZero(); 1089 } 1090 bool isOne() const 1091 { 1092 return a.isOne() && b.isZero(); 1093 } 1094 bool operator==(const Fp12T& rhs) const 1095 { 1096 return a == rhs.a && b == rhs.b; 1097 } 1098 bool operator!=(const Fp12T& rhs) const { return !operator==(rhs); } 1099 static void add(Fp12T& z, const Fp12T& x, const Fp12T& y) 1100 { 1101 Fp6::add(z.a, x.a, y.a); 1102 Fp6::add(z.b, x.b, y.b); 1103 } 1104 static void sub(Fp12T& z, const Fp12T& x, const Fp12T& y) 1105 { 1106 Fp6::sub(z.a, x.a, y.a); 1107 Fp6::sub(z.b, x.b, y.b); 1108 } 1109 static void neg(Fp12T& z, const Fp12T& x) 1110 { 1111 Fp6::neg(z.a, x.a); 1112 Fp6::neg(z.b, x.b); 1113 } 1114 /* 1115 z = x v + y 1116 in Fp6 : (a + bv + cv^2)v = cv^3 + av + bv^2 = cxi + av + bv^2 1117 */ 1118 static void mulVadd(Fp6& z, const Fp6& x, const Fp6& y) 1119 { 1120 Fp2 t; 1121 Fp2::mul_xi(t, x.c); 1122 Fp2::add(z.c, x.b, y.c); 1123 Fp2::add(z.b, x.a, y.b); 1124 Fp2::add(z.a, t, y.a); 1125 } 1126 static void mulVadd(Fp6Dbl& z, const Fp6Dbl& x, const Fp6Dbl& y) 1127 { 1128 Fp2Dbl t; 1129 Fp2Dbl::mul_xi(t, x.c); 1130 Fp2Dbl::add(z.c, x.b, y.c); 1131 Fp2Dbl::add(z.b, x.a, y.b); 1132 Fp2Dbl::add(z.a, t, y.a); 1133 } 1134 /* 1135 x = a + bw, y = c + dw, w^2 = v 1136 z = xy = (a + bw)(c + dw) = (ac + bdv) + (ad + bc)w 1137 ad+bc = (a + b)(c + d) - ac - bd 1138 1139 in Fp6 : (a + bv + cv^2)v = cv^3 + av + bv^2 = cxi + av + bv^2 1140 */ 1141 static void mul(Fp12T& z, const Fp12T& x, const Fp12T& y) 1142 { 1143 // 4.7Kclk -> 4.55Kclk 1144 const Fp6& a = x.a; 1145 const Fp6& b = x.b; 1146 const Fp6& c = y.a; 1147 const Fp6& d = y.b; 1148 Fp6 t1, t2; 1149 Fp6::add(t1, a, b); 1150 Fp6::add(t2, c, d); 1151 #if 1 1152 Fp6Dbl T, AC, BD; 1153 Fp6Dbl::mulPre(AC, a, c); 1154 Fp6Dbl::mulPre(BD, b, d); 1155 mulVadd(T, BD, AC); 1156 Fp6Dbl::mod(z.a, T); 1157 Fp6Dbl::mulPre(T, t1, t2); // (a + b)(c + d) 1158 Fp6Dbl::sub(T, T, AC); 1159 Fp6Dbl::sub(T, T, BD); 1160 Fp6Dbl::mod(z.b, T); 1161 #else 1162 Fp6 ac, bd; 1163 t1 *= t2; // (a + b)(c + d) 1164 Fp6::mul(ac, a, c); 1165 Fp6::mul(bd, b, d); 1166 mulVadd(z.a, bd, ac); 1167 t1 -= ac; 1168 Fp6::sub(z.b, t1, bd); 1169 #endif 1170 } 1171 /* 1172 x = a + bw, w^2 = v 1173 y = x^2 = (a + bw)^2 = (a^2 + b^2v) + 2abw 1174 a^2 + b^2v = (a + b)(bv + a) - (abv + ab) 1175 */ 1176 static void sqr(Fp12T& y, const Fp12T& x) 1177 { 1178 const Fp6& a = x.a; 1179 const Fp6& b = x.b; 1180 Fp6 t0, t1; 1181 Fp6::add(t0, a, b); // a + b 1182 mulVadd(t1, b, a); // bv + a 1183 t0 *= t1; // (a + b)(bv + a) 1184 Fp6::mul(t1, a, b); // ab 1185 Fp6::add(y.b, t1, t1); // 2ab 1186 mulVadd(y.a, t1, t1); // abv + ab 1187 Fp6::sub(y.a, t0, y.a); 1188 } 1189 /* 1190 x = a + bw, w^2 = v 1191 y = 1/x = (a - bw) / (a^2 - b^2v) 1192 */ 1193 static void inv(Fp12T& y, const Fp12T& x) 1194 { 1195 const Fp6& a = x.a; 1196 const Fp6& b = x.b; 1197 Fp6 t0, t1; 1198 Fp6::sqr(t0, a); 1199 Fp6::sqr(t1, b); 1200 Fp2::mul_xi(t1.c, t1.c); 1201 t0.a -= t1.c; 1202 t0.b -= t1.a; 1203 t0.c -= t1.b; // t0 = a^2 - b^2v 1204 Fp6::inv(t0, t0); 1205 Fp6::mul(y.a, x.a, t0); 1206 Fp6::mul(y.b, x.b, t0); 1207 Fp6::neg(y.b, y.b); 1208 } 1209 /* 1210 y = 1 / x = conjugate of x if |x| = 1 1211 */ 1212 static void unitaryInv(Fp12T& y, const Fp12T& x) 1213 { 1214 if (&y != &x) y.a = x.a; 1215 Fp6::neg(y.b, x.b); 1216 } 1217 /* 1218 Frobenius 1219 i^2 = -1 1220 (a + bi)^p = a + bi^p in Fp 1221 = a + bi if p = 1 mod 4 1222 = a - bi if p = 3 mod 4 1223 1224 g = xi^(p - 1) / 6 1225 v^3 = xi in Fp2 1226 v^p = ((v^6) ^ (p-1)/6) v = g^2 v 1227 v^2p = g^4 v^2 1228 (a + bv + cv^2)^p in Fp6 1229 = F(a) + F(b)g^2 v + F(c) g^4 v^2 1230 1231 w^p = ((w^6) ^ (p-1)/6) w = g w 1232 ((a + bv + cv^2)w)^p in Fp12T 1233 = (F(a) g + F(b) g^3 v + F(c) g^5 v^2)w 1234 */ 1235 static void Frobenius(Fp12T& y, const Fp12T& x) 1236 { 1237 for (int i = 0; i < 6; i++) { 1238 Fp2::Frobenius(y.getFp2()[i], x.getFp2()[i]); 1239 } 1240 for (int i = 1; i < 6; i++) { 1241 y.getFp2()[i] *= Fp2::get_gTbl()[i - 1]; 1242 } 1243 } 1244 static void Frobenius2(Fp12T& y, const Fp12T& x) 1245 { 1246 #if 0 1247 Frobenius(y, x); 1248 Frobenius(y, y); 1249 #else 1250 y.getFp2()[0] = x.getFp2()[0]; 1251 if (Fp::getOp().pmod4 == 1) { 1252 for (int i = 1; i < 6; i++) { 1253 Fp2::mul(y.getFp2()[i], x.getFp2()[i], Fp2::get_g2Tbl()[i]); 1254 } 1255 } else { 1256 for (int i = 1; i < 6; i++) { 1257 Fp2::mulFp(y.getFp2()[i], x.getFp2()[i], Fp2::get_g2Tbl()[i - 1].a); 1258 } 1259 } 1260 #endif 1261 } 1262 static void Frobenius3(Fp12T& y, const Fp12T& x) 1263 { 1264 #if 0 1265 Frobenius(y, x); 1266 Frobenius(y, y); 1267 Frobenius(y, y); 1268 #else 1269 Fp2::Frobenius(y.getFp2()[0], x.getFp2()[0]); 1270 for (int i = 1; i < 6; i++) { 1271 Fp2::Frobenius(y.getFp2()[i], x.getFp2()[i]); 1272 y.getFp2()[i] *= Fp2::get_g3Tbl()[i - 1]; 1273 } 1274 #endif 1275 } 1276 template<class InputStream> 1277 void load(bool *pb, InputStream& is, int ioMode) 1278 { 1279 a.load(pb, is, ioMode); if (!*pb) return; 1280 b.load(pb, is, ioMode); 1281 } 1282 template<class OutputStream> 1283 void save(bool *pb, OutputStream& os, int ioMode) const 1284 { 1285 const char sep = *fp::getIoSeparator(ioMode); 1286 a.save(pb, os, ioMode); if (!*pb) return; 1287 if (sep) { 1288 cybozu::writeChar(pb, os, sep); 1289 if (!*pb) return; 1290 } 1291 b.save(pb, os, ioMode); 1292 } 1293 #ifndef CYBOZU_DONT_USE_EXCEPTION 1294 template<class InputStream> 1295 void load(InputStream& is, int ioMode = IoSerialize) 1296 { 1297 bool b; 1298 load(&b, is, ioMode); 1299 if (!b) throw cybozu::Exception("Fp12T:load"); 1300 } 1301 template<class OutputStream> 1302 void save(OutputStream& os, int ioMode = IoSerialize) const 1303 { 1304 bool b; 1305 save(&b, os, ioMode); 1306 if (!b) throw cybozu::Exception("Fp12T:save"); 1307 } 1308 #endif 1309 #ifndef CYBOZU_DONT_USE_STRING 1310 friend std::istream& operator>>(std::istream& is, Fp12T& self) 1311 { 1312 self.load(is, fp::detectIoMode(Fp::BaseFp::getIoMode(), is)); 1313 return is; 1314 } 1315 friend std::ostream& operator<<(std::ostream& os, const Fp12T& self) 1316 { 1317 self.save(os, fp::detectIoMode(Fp::BaseFp::getIoMode(), os)); 1318 return os; 1319 } 1320 #endif 1321 }; 1322 1323 /* 1324 convert multiplicative group to additive group 1325 */ 1326 template<class T> 1327 struct GroupMtoA : public T { 1328 static T& castT(GroupMtoA& x) { return static_cast<T&>(x); } 1329 static const T& castT(const GroupMtoA& x) { return static_cast<const T&>(x); } 1330 void clear() 1331 { 1332 castT(*this) = 1; 1333 } 1334 bool isZero() const { return castT(*this).isOne(); } 1335 static void add(GroupMtoA& z, const GroupMtoA& x, const GroupMtoA& y) 1336 { 1337 T::mul(castT(z), castT(x), castT(y)); 1338 } 1339 static void dbl(GroupMtoA& y, const GroupMtoA& x) 1340 { 1341 T::sqr(castT(y), castT(x)); 1342 } 1343 static void neg(GroupMtoA& y, const GroupMtoA& x) 1344 { 1345 // assume Fp12 1346 T::unitaryInv(castT(y), castT(x)); 1347 } 1348 static void Frobenus(GroupMtoA& y, const GroupMtoA& x) 1349 { 1350 T::Frobenius(castT(y), castT(x)); 1351 } 1352 template<class INT> 1353 static void mul(GroupMtoA& z, const GroupMtoA& x, const INT& y) 1354 { 1355 T::pow(castT(z), castT(x), y); 1356 } 1357 template<class INT> 1358 static void mulGeneric(GroupMtoA& z, const GroupMtoA& x, const INT& y) 1359 { 1360 T::powGeneric(castT(z), castT(x), y); 1361 } 1362 void operator+=(const GroupMtoA& rhs) 1363 { 1364 add(*this, *this, rhs); 1365 } 1366 void normalize() {} 1367 private: 1368 bool isOne() const; 1369 }; 1370 1371 } // mcl 1372