github.com/platonnetwork/platon-go@v0.7.6/cases/tool/win/bls_win/include/mcl/bn.hpp (about)

     1  #pragma once
     2  /**
     3  	@file
     4  	@brief optimal ate pairing over BN-curve / BLS12-curve
     5  	@author MITSUNARI Shigeo(@herumi)
     6  	@license modified new BSD license
     7  	http://opensource.org/licenses/BSD-3-Clause
     8  */
     9  #include <mcl/fp_tower.hpp>
    10  #include <mcl/ec.hpp>
    11  #include <mcl/curve_type.h>
    12  #include <assert.h>
    13  #ifndef CYBOZU_DONT_USE_EXCEPTION
    14  #include <vector>
    15  #endif
    16  
    17  /*
    18  	set bit size of Fp and Fr
    19  */
    20  #ifndef MCL_MAX_FP_BIT_SIZE
    21  	#define MCL_MAX_FP_BIT_SIZE 256
    22  #endif
    23  
    24  #ifndef MCL_MAX_FR_BIT_SIZE
    25  	#define MCL_MAX_FR_BIT_SIZE MCL_MAX_FP_BIT_SIZE
    26  #endif
    27  namespace mcl {
    28  
    29  struct CurveParam {
    30  	/*
    31  		y^2 = x^3 + b
    32  		i^2 = -1
    33  		xi = xi_a + i
    34  		v^3 = xi
    35  		w^2 = v
    36  	*/
    37  	const char *z;
    38  	int b; // y^2 = x^3 + b
    39  	int xi_a; // xi = xi_a + i
    40  	/*
    41  		BN254, BN381 : Dtype
    42  		BLS12-381 : Mtype
    43  	*/
    44  	bool isMtype;
    45  	int curveType; // same in curve_type.h
    46  	bool operator==(const CurveParam& rhs) const
    47  	{
    48  		return strcmp(z, rhs.z) == 0 && b == rhs.b && xi_a == rhs.xi_a && isMtype == rhs.isMtype;
    49  	}
    50  	bool operator!=(const CurveParam& rhs) const { return !operator==(rhs); }
    51  };
    52  
    53  const CurveParam BN254 = { "-0x4080000000000001", 2, 1, false, MCL_BN254 }; // -(2^62 + 2^55 + 1)
    54  // provisional(experimental) param with maxBitSize = 384
    55  const CurveParam BN381_1 = { "-0x400011000000000000000001", 2, 1, false, MCL_BN381_1 }; // -(2^94 + 2^76 + 2^72 + 1) // A Family of Implementation-Friendly BN Elliptic Curves
    56  const CurveParam BN381_2 = { "-0x400040090001000000000001", 2, 1, false, MCL_BN381_2 }; // -(2^94 + 2^78 + 2^67 + 2^64 + 2^48 + 1) // used in relic-toolkit
    57  const CurveParam BN462 = { "0x4001fffffffffffffffffffffbfff", 5, 2, false, MCL_BN462 }; // 2^114 + 2^101 - 2^14 - 1 // https://eprint.iacr.org/2017/334
    58  const CurveParam BN_SNARK1 = { "4965661367192848881", 3, 9, false, MCL_BN_SNARK1 };
    59  const CurveParam BLS12_381 = { "-0xd201000000010000", 4, 1, true, MCL_BLS12_381 };
    60  const CurveParam BN160 = { "0x4000000031", 3, 4, false, MCL_BN160 };
    61  
    62  inline const CurveParam& getCurveParam(int type)
    63  {
    64  	switch (type) {
    65  	case MCL_BN254: return mcl::BN254;
    66  	case MCL_BN381_1: return mcl::BN381_1;
    67  	case MCL_BN381_2: return mcl::BN381_2;
    68  	case MCL_BN462: return mcl::BN462;
    69  	case MCL_BN_SNARK1: return mcl::BN_SNARK1;
    70  	case MCL_BLS12_381: return mcl::BLS12_381;
    71  	case MCL_BN160: return mcl::BN160;
    72  	default:
    73  		assert(0);
    74  		return mcl::BN254;
    75  	}
    76  }
    77  
    78  namespace bn {
    79  
    80  namespace local {
    81  struct FpTag;
    82  struct FrTag;
    83  }
    84  
    85  typedef mcl::FpT<local::FpTag, MCL_MAX_FP_BIT_SIZE> Fp;
    86  typedef mcl::FpT<local::FrTag, MCL_MAX_FR_BIT_SIZE> Fr;
    87  typedef mcl::Fp2T<Fp> Fp2;
    88  typedef mcl::Fp6T<Fp> Fp6;
    89  typedef mcl::Fp12T<Fp> Fp12;
    90  typedef mcl::EcT<Fp> G1;
    91  typedef mcl::EcT<Fp2> G2;
    92  typedef Fp12 GT;
    93  
    94  typedef mcl::FpDblT<Fp> FpDbl;
    95  typedef mcl::Fp2DblT<Fp> Fp2Dbl;
    96  
    97  inline void Frobenius(Fp2& y, const Fp2& x)
    98  {
    99  	Fp2::Frobenius(y, x);
   100  }
   101  inline void Frobenius(Fp12& y, const Fp12& x)
   102  {
   103  	Fp12::Frobenius(y, x);
   104  }
   105  /*
   106  	twisted Frobenius for G2
   107  */
   108  void Frobenius(G2& D, const G2& S);
   109  void Frobenius2(G2& D, const G2& S);
   110  void Frobenius3(G2& D, const G2& S);
   111  
   112  namespace local {
   113  
   114  typedef mcl::FixedArray<int8_t, 128> SignVec;
   115  
   116  inline size_t getPrecomputeQcoeffSize(const SignVec& sv)
   117  {
   118  	size_t idx = 2 + 2;
   119  	for (size_t i = 2; i < sv.size(); i++) {
   120  		idx++;
   121  		if (sv[i]) idx++;
   122  	}
   123  	return idx;
   124  }
   125  
   126  template<class X, class C, size_t N>
   127  X evalPoly(const X& x, const C (&c)[N])
   128  {
   129  	X ret = c[N - 1];
   130  	for (size_t i = 1; i < N; i++) {
   131  		ret *= x;
   132  		ret += c[N - 1 - i];
   133  	}
   134  	return ret;
   135  }
   136  
   137  enum TwistBtype {
   138  	tb_generic,
   139  	tb_1m1i, // 1 - 1i
   140  	tb_1m2i // 1 - 2i
   141  };
   142  
   143  /*
   144  	l = (a, b, c) => (a, b * P.y, c * P.x)
   145  */
   146  inline void updateLine(Fp6& l, const G1& P)
   147  {
   148  	l.b.a *= P.y;
   149  	l.b.b *= P.y;
   150  	l.c.a *= P.x;
   151  	l.c.b *= P.x;
   152  }
   153  
   154  struct Compress {
   155  	Fp12& z_;
   156  	Fp2& g1_;
   157  	Fp2& g2_;
   158  	Fp2& g3_;
   159  	Fp2& g4_;
   160  	Fp2& g5_;
   161  	// z is output area
   162  	Compress(Fp12& z, const Fp12& x)
   163  		: z_(z)
   164  		, g1_(z.getFp2()[4])
   165  		, g2_(z.getFp2()[3])
   166  		, g3_(z.getFp2()[2])
   167  		, g4_(z.getFp2()[1])
   168  		, g5_(z.getFp2()[5])
   169  	{
   170  		g2_ = x.getFp2()[3];
   171  		g3_ = x.getFp2()[2];
   172  		g4_ = x.getFp2()[1];
   173  		g5_ = x.getFp2()[5];
   174  	}
   175  	Compress(Fp12& z, const Compress& c)
   176  		: z_(z)
   177  		, g1_(z.getFp2()[4])
   178  		, g2_(z.getFp2()[3])
   179  		, g3_(z.getFp2()[2])
   180  		, g4_(z.getFp2()[1])
   181  		, g5_(z.getFp2()[5])
   182  	{
   183  		g2_ = c.g2_;
   184  		g3_ = c.g3_;
   185  		g4_ = c.g4_;
   186  		g5_ = c.g5_;
   187  	}
   188  	void decompressBeforeInv(Fp2& nume, Fp2& denomi) const
   189  	{
   190  		assert(&nume != &denomi);
   191  
   192  		if (g2_.isZero()) {
   193  			Fp2::add(nume, g4_, g4_);
   194  			nume *= g5_;
   195  			denomi = g3_;
   196  		} else {
   197  			Fp2 t;
   198  			Fp2::sqr(nume, g5_);
   199  			Fp2::mul_xi(denomi, nume);
   200  			Fp2::sqr(nume, g4_);
   201  			Fp2::sub(t, nume, g3_);
   202  			t += t;
   203  			t += nume;
   204  			Fp2::add(nume, denomi, t);
   205  			Fp2::divBy4(nume, nume);
   206  			denomi = g2_;
   207  		}
   208  	}
   209  
   210  	// output to z
   211  	void decompressAfterInv()
   212  	{
   213  		Fp2& g0 = z_.getFp2()[0];
   214  		Fp2 t0, t1;
   215  		// Compute g0.
   216  		Fp2::sqr(t0, g1_);
   217  		Fp2::mul(t1, g3_, g4_);
   218  		t0 -= t1;
   219  		t0 += t0;
   220  		t0 -= t1;
   221  		Fp2::mul(t1, g2_, g5_);
   222  		t0 += t1;
   223  		Fp2::mul_xi(g0, t0);
   224  		g0.a += Fp::one();
   225  	}
   226  
   227  public:
   228  	void decompress() // for test
   229  	{
   230  		Fp2 nume, denomi;
   231  		decompressBeforeInv(nume, denomi);
   232  		Fp2::inv(denomi, denomi);
   233  		g1_ = nume * denomi; // g1 is recoverd.
   234  		decompressAfterInv();
   235  	}
   236  	/*
   237  		2275clk * 186 = 423Kclk QQQ
   238  	*/
   239  	static void squareC(Compress& z)
   240  	{
   241  		Fp2 t0, t1, t2;
   242  		Fp2Dbl T0, T1, T2, T3;
   243  		Fp2Dbl::sqrPre(T0, z.g4_);
   244  		Fp2Dbl::sqrPre(T1, z.g5_);
   245  		Fp2Dbl::mul_xi(T2, T1);
   246  		T2 += T0;
   247  		Fp2Dbl::mod(t2, T2);
   248  		Fp2::add(t0, z.g4_, z.g5_);
   249  		Fp2Dbl::sqrPre(T2, t0);
   250  		T0 += T1;
   251  		T2 -= T0;
   252  		Fp2Dbl::mod(t0, T2);
   253  		Fp2::add(t1, z.g2_, z.g3_);
   254  		Fp2Dbl::sqrPre(T3, t1);
   255  		Fp2Dbl::sqrPre(T2, z.g2_);
   256  		Fp2::mul_xi(t1, t0);
   257  		z.g2_ += t1;
   258  		z.g2_ += z.g2_;
   259  		z.g2_ += t1;
   260  		Fp2::sub(t1, t2, z.g3_);
   261  		t1 += t1;
   262  		Fp2Dbl::sqrPre(T1, z.g3_);
   263  		Fp2::add(z.g3_, t1, t2);
   264  		Fp2Dbl::mul_xi(T0, T1);
   265  		T0 += T2;
   266  		Fp2Dbl::mod(t0, T0);
   267  		Fp2::sub(z.g4_, t0, z.g4_);
   268  		z.g4_ += z.g4_;
   269  		z.g4_ += t0;
   270  		Fp2Dbl::addPre(T2, T2, T1);
   271  		T3 -= T2;
   272  		Fp2Dbl::mod(t0, T3);
   273  		z.g5_ += t0;
   274  		z.g5_ += z.g5_;
   275  		z.g5_ += t0;
   276  	}
   277  	static void square_n(Compress& z, int n)
   278  	{
   279  		for (int i = 0; i < n; i++) {
   280  			squareC(z);
   281  		}
   282  	}
   283  	/*
   284  		Exponentiation over compression for:
   285  		z = x^Param::z.abs()
   286  	*/
   287  	static void fixed_power(Fp12& z, const Fp12& x)
   288  	{
   289  		if (x.isOne()) {
   290  			z = 1;
   291  			return;
   292  		}
   293  		Fp12 x_org = x;
   294  		Fp12 d62;
   295  		Fp2 c55nume, c55denomi, c62nume, c62denomi;
   296  		Compress c55(z, x);
   297  		square_n(c55, 55);
   298  		c55.decompressBeforeInv(c55nume, c55denomi);
   299  		Compress c62(d62, c55);
   300  		square_n(c62, 62 - 55);
   301  		c62.decompressBeforeInv(c62nume, c62denomi);
   302  		Fp2 acc;
   303  		Fp2::mul(acc, c55denomi, c62denomi);
   304  		Fp2::inv(acc, acc);
   305  		Fp2 t;
   306  		Fp2::mul(t, acc, c62denomi);
   307  		Fp2::mul(c55.g1_, c55nume, t);
   308  		c55.decompressAfterInv();
   309  		Fp2::mul(t, acc, c55denomi);
   310  		Fp2::mul(c62.g1_, c62nume, t);
   311  		c62.decompressAfterInv();
   312  		z *= x_org;
   313  		z *= d62;
   314  	}
   315  };
   316  
   317  struct MapTo {
   318  	enum {
   319  		BNtype,
   320  		BLS12type,
   321  		STD_ECtype
   322  	};
   323  	Fp c1_; // sqrt(-3)
   324  	Fp c2_; // (-1 + sqrt(-3)) / 2
   325  	mpz_class z_;
   326  	mpz_class cofactor_;
   327  	int type_;
   328  	bool useNaiveMapTo_;
   329  
   330  	int legendre(bool *pb, const Fp& x) const
   331  	{
   332  		mpz_class xx;
   333  		x.getMpz(pb, xx);
   334  		if (!*pb) return 0;
   335  		return gmp::legendre(xx, Fp::getOp().mp);
   336  	}
   337  	int legendre(bool *pb, const Fp2& x) const
   338  	{
   339  		Fp y;
   340  		Fp2::norm(y, x);
   341  		return legendre(pb, y);
   342  	}
   343  	void mulFp(Fp& x, const Fp& y) const
   344  	{
   345  		x *= y;
   346  	}
   347  	void mulFp(Fp2& x, const Fp& y) const
   348  	{
   349  		x.a *= y;
   350  		x.b *= y;
   351  	}
   352  	/*
   353  		P.-A. Fouque and M. Tibouchi,
   354  		"Indifferentiable hashing to Barreto Naehrig curves,"
   355  		in Proc. Int. Conf. Cryptol. Inform. Security Latin Amer., 2012, vol. 7533, pp.1-17.
   356  
   357  		w = sqrt(-3) t / (1 + b + t^2)
   358  		Remark: throw exception if t = 0, c1, -c1 and b = 2
   359  	*/
   360  	template<class G, class F>
   361  	bool calcBN(G& P, const F& t) const
   362  	{
   363  		F x, y, w;
   364  		bool b;
   365  		bool negative = legendre(&b, t) < 0;
   366  		if (!b) return false;
   367  		if (t.isZero()) return false;
   368  		F::sqr(w, t);
   369  		w += G::b_;
   370  		*w.getFp0() += Fp::one();
   371  		if (w.isZero()) return false;
   372  		F::inv(w, w);
   373  		mulFp(w, c1_);
   374  		w *= t;
   375  		for (int i = 0; i < 3; i++) {
   376  			switch (i) {
   377  			case 0: F::mul(x, t, w); F::neg(x, x); *x.getFp0() += c2_; break;
   378  			case 1: F::neg(x, x); *x.getFp0() -= Fp::one(); break;
   379  			case 2: F::sqr(x, w); F::inv(x, x); *x.getFp0() += Fp::one(); break;
   380  			}
   381  			G::getWeierstrass(y, x);
   382  			if (F::squareRoot(y, y)) {
   383  				if (negative) F::neg(y, y);
   384  				P.set(&b, x, y, false);
   385  				assert(b);
   386  				return true;
   387  			}
   388  		}
   389  		return false;
   390  	}
   391  	/*
   392  		Faster Hashing to G2
   393  		Laura Fuentes-Castaneda, Edward Knapp, Francisco Rodriguez-Henriquez
   394  		section 6.1
   395  		for BN
   396  		Q = zP + Frob(3zP) + Frob^2(zP) + Frob^3(P)
   397  		  = -(18x^3 + 12x^2 + 3x + 1)cofactor_ P
   398  	*/
   399  	void mulByCofactorBN(G2& Q, const G2& P) const
   400  	{
   401  #if 0
   402  		G2::mulGeneric(Q, P, cofactor_);
   403  #else
   404  #if 0
   405  		mpz_class t = -(1 + z_ * (3 + z_ * (12 + z_ * 18)));
   406  		G2::mulGeneric(Q, P, t * cofactor_);
   407  #else
   408  		G2 T0, T1, T2;
   409  		/*
   410  			G2::mul (GLV method) can't be used because P is not on G2
   411  		*/
   412  		G2::mulGeneric(T0, P, z_);
   413  		G2::dbl(T1, T0);
   414  		T1 += T0; // 3zP
   415  		Frobenius(T1, T1);
   416  		Frobenius2(T2, T0);
   417  		T0 += T1;
   418  		T0 += T2;
   419  		Frobenius3(T2, P);
   420  		G2::add(Q, T0, T2);
   421  #endif
   422  #endif
   423  	}
   424  	/*
   425  		1.2~1.4 times faster than calBN
   426  	*/
   427  	template<class G, class F>
   428  	void naiveMapTo(G& P, const F& t) const
   429  	{
   430  		F x = t;
   431  		for (;;) {
   432  			F y;
   433  			G::getWeierstrass(y, x);
   434  			if (F::squareRoot(y, y)) {
   435  				bool b;
   436  				P.set(&b, x, y, false);
   437  				assert(b);
   438  				return;
   439  			}
   440  			*x.getFp0() += Fp::one();
   441  		}
   442  	}
   443  	/*
   444  		#(Fp) / r = (z + 1 - t) / r = (z - 1)^2 / 3
   445  	*/
   446  	void mulByCofactorBLS12(G1& Q, const G1& P) const
   447  	{
   448  		G1::mulGeneric(Q, P, cofactor_);
   449  	}
   450  	/*
   451  		Efficient hash maps to G2 on BLS curves
   452  		Alessandro Budroni, Federico Pintore
   453  		Q = (z(z-1)-1)P + Frob((z-1)P) + Frob^2(2P)
   454  	*/
   455  	void mulByCofactorBLS12(G2& Q, const G2& P) const
   456  	{
   457  		G2 T0, T1;
   458  		G2::mulGeneric(T0, P, z_ - 1);
   459  		G2::mulGeneric(T1, T0, z_);
   460  		T1 -= P;
   461  		Frobenius(T0, T0);
   462  		T0 += T1;
   463  		G2::dbl(T1, P);
   464  		Frobenius2(T1, T1);
   465  		G2::add(Q, T0, T1);
   466  	}
   467  	/*
   468  		cofactor_ is for G2(not used now)
   469  	*/
   470  	void initBN(const mpz_class& cofactor, const mpz_class &z, int curveType)
   471  	{
   472  		z_ = z;
   473  		cofactor_ = cofactor;
   474  		if (curveType == MCL_BN254) {
   475  			const char *c1 = "252364824000000126cd890000000003cf0f0000000000060c00000000000004";
   476  			const char *c2 = "25236482400000017080eb4000000006181800000000000cd98000000000000b";
   477  			bool b;
   478  			c1_.setStr(&b, c1, 16);
   479  			c2_.setStr(&b, c2, 16);
   480  			(void)b;
   481  			return;
   482  		}
   483  		bool b = Fp::squareRoot(c1_, -3);
   484  		assert(b);
   485  		(void)b;
   486  		c2_ = (c1_ - 1) / 2;
   487  	}
   488  	void initBLS12(const mpz_class& z)
   489  	{
   490  		z_ = z;
   491  		// cofactor for G1
   492  		cofactor_ = (z - 1) * (z - 1) / 3;
   493  		bool b = Fp::squareRoot(c1_, -3);
   494  		assert(b);
   495  		(void)b;
   496  		c2_ = (c1_ - 1) / 2;
   497  	}
   498  	/*
   499  		if type == STD_ECtype, then cofactor, z are not used.
   500  	*/
   501  	void init(const mpz_class& cofactor, const mpz_class &z, int curveType)
   502  	{
   503  		if (0 <= curveType && curveType < MCL_EC_BEGIN) {
   504  			type_ = curveType == MCL_BLS12_381 ? BLS12type : BNtype;
   505  		} else {
   506  			type_ = STD_ECtype;
   507  		}
   508  		if (type_ == STD_ECtype) {
   509  			useNaiveMapTo_ = true;
   510  		} else {
   511  			useNaiveMapTo_ = false;
   512  		}
   513  #ifdef MCL_USE_OLD_MAPTO_FOR_BLS12
   514  		if (type == BLS12type) useNaiveMapTo_ = true;
   515  #endif
   516  		if (type_ == BNtype) {
   517  			initBN(cofactor, z, curveType);
   518  		} else if (type_ == BLS12type) {
   519  			initBLS12(z);
   520  		}
   521  	}
   522  	bool calcG1(G1& P, const Fp& t) const
   523  	{
   524  		if (useNaiveMapTo_) {
   525  			naiveMapTo<G1, Fp>(P, t);
   526  		} else {
   527  			if (!calcBN<G1, Fp>(P, t)) return false;
   528  		}
   529  		switch (type_) {
   530  		case BNtype:
   531  			// no subgroup
   532  			break;
   533  		case BLS12type:
   534  			mulByCofactorBLS12(P, P);
   535  			break;
   536  		}
   537  		assert(P.isValid());
   538  		return true;
   539  	}
   540  	/*
   541  		get the element in G2 by multiplying the cofactor
   542  	*/
   543  	bool calcG2(G2& P, const Fp2& t) const
   544  	{
   545  		if (useNaiveMapTo_) {
   546  			naiveMapTo<G2, Fp2>(P, t);
   547  		} else {
   548  			if (!calcBN<G2, Fp2>(P, t)) return false;
   549  		}
   550  		switch(type_) {
   551  		case BNtype:
   552  			mulByCofactorBN(P, P);
   553  			break;
   554  		case BLS12type:
   555  			mulByCofactorBLS12(P, P);
   556  			break;
   557  		}
   558  		assert(P.isValid());
   559  		return true;
   560  	}
   561  };
   562  
   563  /*
   564  	Software implementation of Attribute-Based Encryption: Appendixes
   565  	GLV for G1 on BN/BLS12
   566  */
   567  struct GLV1 {
   568  	Fp rw; // rw = 1 / w = (-1 - sqrt(-3)) / 2
   569  	size_t rBitSize;
   570  	mpz_class v0, v1;
   571  	mpz_class B[2][2];
   572  	mpz_class r;
   573  private:
   574  	bool usePrecomputedTable(int curveType)
   575  	{
   576  		if (curveType < 0) return false;
   577  		const struct Tbl {
   578  			int curveType;
   579  			const char *rw;
   580  			size_t rBitSize;
   581  			const char *v0, *v1;
   582  			const char *B[2][2];
   583  			const char *r;
   584  		} tbl[] = {
   585  			{
   586  				MCL_BN254,
   587  				"49b36240000000024909000000000006cd80000000000007",
   588  				256,
   589  				"2a01fab7e04a017b9c0eb31ff36bf3357",
   590  				"37937ca688a6b4904",
   591  				{
   592  					{
   593  						"61818000000000028500000000000004",
   594  						"8100000000000001",
   595  					},
   596  					{
   597  						"8100000000000001",
   598  						"-61818000000000020400000000000003",
   599  					},
   600  				},
   601  				"2523648240000001ba344d8000000007ff9f800000000010a10000000000000d",
   602  			},
   603  		};
   604  		for (size_t i = 0; i < CYBOZU_NUM_OF_ARRAY(tbl); i++) {
   605  			if (tbl[i].curveType != curveType) continue;
   606  			bool b;
   607  			rw.setStr(&b, tbl[i].rw, 16); if (!b) continue;
   608  			rBitSize = tbl[i].rBitSize;
   609  			mcl::gmp::setStr(&b, v0, tbl[i].v0, 16); if (!b) continue;
   610  			mcl::gmp::setStr(&b, v1, tbl[i].v1, 16); if (!b) continue;
   611  			mcl::gmp::setStr(&b, B[0][0], tbl[i].B[0][0], 16); if (!b) continue;
   612  			mcl::gmp::setStr(&b, B[0][1], tbl[i].B[0][1], 16); if (!b) continue;
   613  			mcl::gmp::setStr(&b, B[1][0], tbl[i].B[1][0], 16); if (!b) continue;
   614  			mcl::gmp::setStr(&b, B[1][1], tbl[i].B[1][1], 16); if (!b) continue;
   615  			mcl::gmp::setStr(&b, r, tbl[i].r, 16); if (!b) continue;
   616  			return true;
   617  		}
   618  		return false;
   619  	}
   620  public:
   621  	bool operator==(const GLV1& rhs) const
   622  	{
   623  		return rw == rhs.rw && rBitSize == rhs.rBitSize && v0 == rhs.v0 && v1 == rhs.v1
   624  			&& B[0][0] == rhs.B[0][0] && B[0][1] == rhs.B[0][1] && B[1][0] == rhs.B[1][0]
   625  			&& B[1][1] == rhs.B[1][1] && r == rhs.r;
   626  	}
   627  	bool operator!=(const GLV1& rhs) const { return !operator==(rhs); }
   628  #ifndef CYBOZU_DONT_USE_STRING
   629  	void dump(const mpz_class& x) const
   630  	{
   631  		printf("\"%s\",\n", mcl::gmp::getStr(x, 16).c_str());
   632  	}
   633  	void dump() const
   634  	{
   635  		printf("\"%s\",\n", rw.getStr(16).c_str());
   636  		printf("%d,\n", (int)rBitSize);
   637  		dump(v0);
   638  		dump(v1);
   639  		dump(B[0][0]); dump(B[0][1]); dump(B[1][0]); dump(B[1][1]);
   640  		dump(r);
   641  	}
   642  #endif
   643  	void init(const mpz_class& r, const mpz_class& z, bool isBLS12 = false, int curveType = -1)
   644  	{
   645  		if (usePrecomputedTable(curveType)) return;
   646  		bool b = Fp::squareRoot(rw, -3);
   647  		assert(b);
   648  		(void)b;
   649  		rw = -(rw + 1) / 2;
   650  		this->r = r;
   651  		rBitSize = gmp::getBitSize(r);
   652  		rBitSize = (rBitSize + fp::UnitBitSize - 1) & ~(fp::UnitBitSize - 1);// a little better size
   653  		if (isBLS12) {
   654  			/*
   655  				BLS12
   656  				L = z^4
   657  				(-z^2+1) + L = 0
   658  				1 + z^2 L = 0
   659  			*/
   660  			B[0][0] = -z * z + 1;
   661  			B[0][1] = 1;
   662  			B[1][0] = 1;
   663  			B[1][1] = z * z;
   664  		} else {
   665  			/*
   666  				BN
   667  				L = 36z^4 - 1
   668  				(6z^2+2z) - (2z+1)   L = 0
   669  				(-2z-1) - (6z^2+4z+1)L = 0
   670  			*/
   671  			B[0][0] = 6 * z * z + 2 * z;
   672  			B[0][1] = -2 * z - 1;
   673  			B[1][0] = -2 * z - 1;
   674  			B[1][1] = -6 * z * z - 4 * z - 1;
   675  		}
   676  		// [v0 v1] = [r 0] * B^(-1)
   677  		v0 = ((-B[1][1]) << rBitSize) / r;
   678  		v1 = ((B[1][0]) << rBitSize) / r;
   679  	}
   680  	/*
   681  		L = lambda = p^4
   682  		L (x, y) = (rw x, y)
   683  	*/
   684  	void mulLambda(G1& Q, const G1& P) const
   685  	{
   686  		Fp::mul(Q.x, P.x, rw);
   687  		Q.y = P.y;
   688  		Q.z = P.z;
   689  	}
   690  	/*
   691  		x = a + b * lambda mod r
   692  	*/
   693  	void split(mpz_class& a, mpz_class& b, const mpz_class& x) const
   694  	{
   695  		mpz_class t;
   696  		t = (x * v0) >> rBitSize;
   697  		b = (x * v1) >> rBitSize;
   698  		a = x - (t * B[0][0] + b * B[1][0]);
   699  		b = - (t * B[0][1] + b * B[1][1]);
   700  	}
   701  	void mul(G1& Q, const G1& P, mpz_class x, bool constTime = false) const
   702  	{
   703  		typedef mcl::fp::Unit Unit;
   704  		const size_t maxUnit = 512 / 2 / mcl::fp::UnitBitSize;
   705  		const int splitN = 2;
   706  		mpz_class u[splitN];
   707  		G1 in[splitN];
   708  		G1 tbl[4];
   709  		int bitTbl[splitN]; // bit size of u[i]
   710  		Unit w[splitN][maxUnit]; // unit array of u[i]
   711  		int maxBit = 0; // max bit of u[i]
   712  		int maxN = 0;
   713  		int remainBit = 0;
   714  
   715  		x %= r;
   716  		if (x == 0) {
   717  			Q.clear();
   718  			if (constTime) goto DummyLoop;
   719  			return;
   720  		}
   721  		if (x < 0) {
   722  			x += r;
   723  		}
   724  		split(u[0], u[1], x);
   725  		in[0] = P;
   726  		mulLambda(in[1], in[0]);
   727  		for (int i = 0; i < splitN; i++) {
   728  			if (u[i] < 0) {
   729  				u[i] = -u[i];
   730  				G1::neg(in[i], in[i]);
   731  			}
   732  			in[i].normalize();
   733  		}
   734  #if 0
   735  		G1::mulGeneric(in[0], in[0], u[0]);
   736  		G1::mulGeneric(in[1], in[1], u[1]);
   737  		G1::add(Q, in[0], in[1]);
   738  		return;
   739  #else
   740  		tbl[0] = in[0]; // dummy
   741  		tbl[1] = in[0];
   742  		tbl[2] = in[1];
   743  		G1::add(tbl[3], in[0], in[1]);
   744  		tbl[3].normalize();
   745  		for (int i = 0; i < splitN; i++) {
   746  			bool b;
   747  			mcl::gmp::getArray(&b, w[i], maxUnit, u[i]);
   748  			assert(b);
   749  			bitTbl[i] = (int)mcl::gmp::getBitSize(u[i]);
   750  			maxBit = fp::max_(maxBit, bitTbl[i]);
   751  		}
   752  		assert(maxBit > 0);
   753  		maxBit--;
   754  		/*
   755  			maxBit = maxN * UnitBitSize + remainBit
   756  			0 < remainBit <= UnitBitSize
   757  		*/
   758  		maxN = maxBit / mcl::fp::UnitBitSize;
   759  		remainBit = maxBit % mcl::fp::UnitBitSize;
   760  		remainBit++;
   761  		Q.clear();
   762  		for (int i = maxN; i >= 0; i--) {
   763  			for (int j = remainBit - 1; j >= 0; j--) {
   764  				G1::dbl(Q, Q);
   765  				uint32_t b0 = (w[0][i] >> j) & 1;
   766  				uint32_t b1 = (w[1][i] >> j) & 1;
   767  				uint32_t c = b1 * 2 + b0;
   768  				if (c == 0) {
   769  					if (constTime) tbl[0] += tbl[1];
   770  				} else {
   771  					Q += tbl[c];
   772  				}
   773  			}
   774  			remainBit = (int)mcl::fp::UnitBitSize;
   775  		}
   776  #endif
   777  	DummyLoop:
   778  		if (!constTime) return;
   779  		const int limitBit = (int)rBitSize / splitN;
   780  		G1 D = tbl[0];
   781  		for (int i = maxBit + 1; i < limitBit; i++) {
   782  			G1::dbl(D, D);
   783  			D += tbl[0];
   784  		}
   785  	}
   786  };
   787  
   788  /*
   789  	GLV method for G2 and GT on BN/BLS12
   790  */
   791  struct GLV2 {
   792  	size_t rBitSize;
   793  	mpz_class B[4][4];
   794  	mpz_class r;
   795  	mpz_class v[4];
   796  	mpz_class z;
   797  	mpz_class abs_z;
   798  	bool isBLS12;
   799  	GLV2() : rBitSize(0), isBLS12(false) {}
   800  	void init(const mpz_class& r, const mpz_class& z, bool isBLS12 = false)
   801  	{
   802  		this->r = r;
   803  		this->z = z;
   804  		this->abs_z = z < 0 ? -z : z;
   805  		this->isBLS12 = isBLS12;
   806  		rBitSize = mcl::gmp::getBitSize(r);
   807  		rBitSize = (rBitSize + mcl::fp::UnitBitSize - 1) & ~(mcl::fp::UnitBitSize - 1);// a little better size
   808  		mpz_class z2p1 = z * 2 + 1;
   809  		B[0][0] = z + 1;
   810  		B[0][1] = z;
   811  		B[0][2] = z;
   812  		B[0][3] = -2 * z;
   813  		B[1][0] = z2p1;
   814  		B[1][1] = -z;
   815  		B[1][2] = -(z + 1);
   816  		B[1][3] = -z;
   817  		B[2][0] = 2 * z;
   818  		B[2][1] = z2p1;
   819  		B[2][2] = z2p1;
   820  		B[2][3] = z2p1;
   821  		B[3][0] = z - 1;
   822  		B[3][1] = 2 * z2p1;
   823  		B[3][2] =  -2 * z + 1;
   824  		B[3][3] = z - 1;
   825  		/*
   826  			v[] = [r 0 0 0] * B^(-1) = [2z^2+3z+1, 12z^3+8z^2+z, 6z^3+4z^2+z, -(2z+1)]
   827  		*/
   828  		const char *zBN254 = "-4080000000000001";
   829  		mpz_class t;
   830  		bool b;
   831  		mcl::gmp::setStr(&b, t, zBN254, 16);
   832  		assert(b);
   833  		(void)b;
   834  		if (z == t) {
   835  			static const char *vTblBN254[] = {
   836  				"e00a8e7f56e007e5b09fe7fdf43ba998",
   837  				"-152aff56a8054abf9da75db2da3d6885101e5fd3997d41cb1",
   838  				"-a957fab5402a55fced3aed96d1eb44295f40f136ee84e09b",
   839  				"-e00a8e7f56e007e929d7b2667ea6f29c",
   840  			};
   841  			for (int i = 0; i < 4; i++) {
   842  				mcl::gmp::setStr(&b, v[i], vTblBN254[i], 16);
   843  				assert(b);
   844  				(void)b;
   845  			}
   846  		} else {
   847  			v[0] = ((1 + z * (3 + z * 2)) << rBitSize) / r;
   848  			v[1] = ((z * (1 + z * (8 + z * 12))) << rBitSize) / r;
   849  			v[2] = ((z * (1 + z * (4 + z * 6))) << rBitSize) / r;
   850  			v[3] = -((z * (1 + z * 2)) << rBitSize) / r;
   851  		}
   852  	}
   853  	/*
   854  		u[] = [x, 0, 0, 0] - v[] * x * B
   855  	*/
   856  	void split(mpz_class u[4], const mpz_class& x) const
   857  	{
   858  		if (isBLS12) {
   859  			/*
   860  				Frob(P) = zP
   861  				x = u[0] + u[1] z + u[2] z^2 + u[3] z^3
   862  			*/
   863  			bool isNeg = false;
   864  			mpz_class t = x;
   865  			if (t < 0) {
   866  				t = -t;
   867  				isNeg = true;
   868  			}
   869  			for (int i = 0; i < 4; i++) {
   870  				// t = t / abs_z, u[i] = t % abs_z
   871  				mcl::gmp::divmod(t, u[i], t, abs_z);
   872  				if (((z < 0) && (i & 1)) ^ isNeg) {
   873  					u[i] = -u[i];
   874  				}
   875  			}
   876  			return;
   877  		}
   878  		// BN
   879  		mpz_class t[4];
   880  		for (int i = 0; i < 4; i++) {
   881  			t[i] = (x * v[i]) >> rBitSize;
   882  		}
   883  		for (int i = 0; i < 4; i++) {
   884  			u[i] = (i == 0) ? x : 0;
   885  			for (int j = 0; j < 4; j++) {
   886  				u[i] -= t[j] * B[j][i];
   887  			}
   888  		}
   889  	}
   890  	template<class T>
   891  	void mul(T& Q, const T& P, mpz_class x, bool constTime = false) const
   892  	{
   893  #if 0 // #ifndef NDEBUG
   894  		{
   895  			T R;
   896  			T::mulGeneric(R, P, r);
   897  			assert(R.isZero());
   898  		}
   899  #endif
   900  		typedef mcl::fp::Unit Unit;
   901  		const size_t maxUnit = 512 / 2 / mcl::fp::UnitBitSize;
   902  		const int splitN = 4;
   903  		mpz_class u[splitN];
   904  		T in[splitN];
   905  		T tbl[16];
   906  		int bitTbl[splitN]; // bit size of u[i]
   907  		Unit w[splitN][maxUnit]; // unit array of u[i]
   908  		int maxBit = 0; // max bit of u[i]
   909  		int maxN = 0;
   910  		int remainBit = 0;
   911  
   912  		x %= r;
   913  		if (x == 0) {
   914  			Q.clear();
   915  			if (constTime) goto DummyLoop;
   916  			return;
   917  		}
   918  		if (x < 0) {
   919  			x += r;
   920  		}
   921  		split(u, x);
   922  		in[0] = P;
   923  		Frobenius(in[1], in[0]);
   924  		Frobenius(in[2], in[1]);
   925  		Frobenius(in[3], in[2]);
   926  		for (int i = 0; i < splitN; i++) {
   927  			if (u[i] < 0) {
   928  				u[i] = -u[i];
   929  				T::neg(in[i], in[i]);
   930  			}
   931  //			in[i].normalize(); // slow
   932  		}
   933  #if 0
   934  		for (int i = 0; i < splitN; i++) {
   935  			T::mulGeneric(in[i], in[i], u[i]);
   936  		}
   937  		T::add(Q, in[0], in[1]);
   938  		Q += in[2];
   939  		Q += in[3];
   940  		return;
   941  #else
   942  		tbl[0] = in[0];
   943  		for (size_t i = 1; i < 16; i++) {
   944  			tbl[i].clear();
   945  			if (i & 1) {
   946  				tbl[i] += in[0];
   947  			}
   948  			if (i & 2) {
   949  				tbl[i] += in[1];
   950  			}
   951  			if (i & 4) {
   952  				tbl[i] += in[2];
   953  			}
   954  			if (i & 8) {
   955  				tbl[i] += in[3];
   956  			}
   957  //			tbl[i].normalize();
   958  		}
   959  		for (int i = 0; i < splitN; i++) {
   960  			bool b;
   961  			mcl::gmp::getArray(&b, w[i], maxUnit, u[i]);
   962  			assert(b);
   963  			bitTbl[i] = (int)mcl::gmp::getBitSize(u[i]);
   964  			maxBit = fp::max_(maxBit, bitTbl[i]);
   965  		}
   966  		maxBit--;
   967  		/*
   968  			maxBit = maxN * UnitBitSize + remainBit
   969  			0 < remainBit <= UnitBitSize
   970  		*/
   971  		maxN = maxBit / mcl::fp::UnitBitSize;
   972  		remainBit = maxBit % mcl::fp::UnitBitSize;
   973  		remainBit++;
   974  		Q.clear();
   975  		for (int i = maxN; i >= 0; i--) {
   976  			for (int j = remainBit - 1; j >= 0; j--) {
   977  				T::dbl(Q, Q);
   978  				uint32_t b0 = (w[0][i] >> j) & 1;
   979  				uint32_t b1 = (w[1][i] >> j) & 1;
   980  				uint32_t b2 = (w[2][i] >> j) & 1;
   981  				uint32_t b3 = (w[3][i] >> j) & 1;
   982  				uint32_t c = b3 * 8 + b2 * 4 + b1 * 2 + b0;
   983  				if (c == 0) {
   984  					if (constTime) tbl[0] += tbl[1];
   985  				} else {
   986  					Q += tbl[c];
   987  				}
   988  			}
   989  			remainBit = (int)mcl::fp::UnitBitSize;
   990  		}
   991  #endif
   992  	DummyLoop:
   993  		if (!constTime) return;
   994  		const int limitBit = (int)rBitSize / splitN;
   995  		T D = tbl[0];
   996  		for (int i = maxBit + 1; i < limitBit; i++) {
   997  			T::dbl(D, D);
   998  			D += tbl[0];
   999  		}
  1000  	}
  1001  	void pow(Fp12& z, const Fp12& x, mpz_class y, bool constTime = false) const
  1002  	{
  1003  		typedef GroupMtoA<Fp12> AG; // as additive group
  1004  		AG& _z = static_cast<AG&>(z);
  1005  		const AG& _x = static_cast<const AG&>(x);
  1006  		mul(_z, _x, y, constTime);
  1007  	}
  1008  };
  1009  
  1010  struct Param {
  1011  	CurveParam cp;
  1012  	mpz_class z;
  1013  	mpz_class abs_z;
  1014  	bool isNegative;
  1015  	bool isBLS12;
  1016  	mpz_class p;
  1017  	mpz_class r;
  1018  	local::MapTo mapTo;
  1019  	local::GLV1 glv1;
  1020  	local::GLV2 glv2;
  1021  	// for G2 Frobenius
  1022  	Fp2 g2;
  1023  	Fp2 g3;
  1024  	/*
  1025  		Dtype twist
  1026  		(x', y') = phi(x, y) = (x/w^2, y/w^3)
  1027  		y^2 = x^3 + b
  1028  		=> (y'w^3)^2 = (x'w^2)^3 + b
  1029  		=> y'^2 = x'^3 + b / w^6 ; w^6 = xi
  1030  		=> y'^2 = x'^3 + twist_b;
  1031  	*/
  1032  	Fp2 twist_b;
  1033  	local::TwistBtype twist_b_type;
  1034  /*
  1035  	mpz_class exp_c0;
  1036  	mpz_class exp_c1;
  1037  	mpz_class exp_c2;
  1038  	mpz_class exp_c3;
  1039  */
  1040  
  1041  	// Loop parameter for the Miller loop part of opt. ate pairing.
  1042  	local::SignVec siTbl;
  1043  	size_t precomputedQcoeffSize;
  1044  	bool useNAF;
  1045  	local::SignVec zReplTbl;
  1046  
  1047  	// for initG1only
  1048  	G1 basePoint;
  1049  
  1050  	void init(bool *pb, const mcl::CurveParam& cp, fp::Mode mode)
  1051  	{
  1052  		this->cp = cp;
  1053  		isBLS12 = cp.curveType == MCL_BLS12_381;
  1054  		gmp::setStr(pb, z, cp.z);
  1055  		if (!*pb) return;
  1056  		isNegative = z < 0;
  1057  		if (isNegative) {
  1058  			abs_z = -z;
  1059  		} else {
  1060  			abs_z = z;
  1061  		}
  1062  		if (isBLS12) {
  1063  			mpz_class z2 = z * z;
  1064  			mpz_class z4 = z2 * z2;
  1065  			r = z4 - z2 + 1;
  1066  			p = z - 1;
  1067  			p = p * p * r / 3 + z;
  1068  		} else {
  1069  			const int pCoff[] = { 1, 6, 24, 36, 36 };
  1070  			const int rCoff[] = { 1, 6, 18, 36, 36 };
  1071  			p = local::evalPoly(z, pCoff);
  1072  			assert((p % 6) == 1);
  1073  			r = local::evalPoly(z, rCoff);
  1074  		}
  1075  		Fr::init(pb, r, mode);
  1076  		if (!*pb) return;
  1077  		Fp::init(pb, cp.xi_a, p, mode);
  1078  		if (!*pb) return;
  1079  		Fp2::init();
  1080  		const Fp2 xi(cp.xi_a, 1);
  1081  		g2 = Fp2::get_gTbl()[0];
  1082  		g3 = Fp2::get_gTbl()[3];
  1083  		if (cp.isMtype) {
  1084  			Fp2::inv(g2, g2);
  1085  			Fp2::inv(g3, g3);
  1086  		}
  1087  		if (cp.isMtype) {
  1088  			twist_b = Fp2(cp.b) * xi;
  1089  		} else {
  1090  			if (cp.b == 2 && cp.xi_a == 1) {
  1091  				twist_b = Fp2(1, -1); // shortcut
  1092  			} else {
  1093  				twist_b = Fp2(cp.b) / xi;
  1094  			}
  1095  		}
  1096  		if (twist_b == Fp2(1, -1)) {
  1097  			twist_b_type = tb_1m1i;
  1098  		} else if (twist_b == Fp2(1, -2)) {
  1099  			twist_b_type = tb_1m2i;
  1100  		} else {
  1101  			twist_b_type = tb_generic;
  1102  		}
  1103  		G1::init(0, cp.b, mcl::ec::Jacobi);
  1104  		if (isBLS12) {
  1105  			G1::setOrder(r);
  1106  		}
  1107  		G2::init(0, twist_b, mcl::ec::Jacobi);
  1108  		G2::setOrder(r);
  1109  
  1110  		const mpz_class largest_c = isBLS12 ? abs_z : gmp::abs(z * 6 + 2);
  1111  		useNAF = gmp::getNAF(siTbl, largest_c);
  1112  		precomputedQcoeffSize = local::getPrecomputeQcoeffSize(siTbl);
  1113  		gmp::getNAF(zReplTbl, gmp::abs(z));
  1114  /*
  1115  		if (isBLS12) {
  1116  			mpz_class z2 = z * z;
  1117  			mpz_class z3 = z2 * z;
  1118  			mpz_class z4 = z3 * z;
  1119  			mpz_class z5 = z4 * z;
  1120  			exp_c0 = z5 - 2 * z4 + 2 * z2 - z + 3;
  1121  			exp_c1 = z4 - 2 * z3 + 2 * z - 1;
  1122  			exp_c2 = z3 - 2 * z2 + z;
  1123  			exp_c3 = z2 - 2 * z + 1;
  1124  		} else {
  1125  			exp_c0 = -2 + z * (-18 + z * (-30 - 36 * z));
  1126  			exp_c1 = 1 + z * (-12 + z * (-18 - 36 * z));
  1127  			exp_c2 = 6 * z * z + 1;
  1128  		}
  1129  */
  1130  		if (isBLS12) {
  1131  			mapTo.init(0, z, cp.curveType);
  1132  		} else {
  1133  			mapTo.init(2 * p - r, z, cp.curveType);
  1134  		}
  1135  		glv1.init(r, z, isBLS12, cp.curveType);
  1136  		glv2.init(r, z, isBLS12);
  1137  		basePoint.clear();
  1138  		*pb = true;
  1139  	}
  1140  	void initG1only(bool *pb, const mcl::EcParam& para)
  1141  	{
  1142  		Fp::init(pb, para.p);
  1143  		if (!*pb) return;
  1144  		Fr::init(pb, para.n);
  1145  		if (!*pb) return;
  1146  		G1::init(pb, para.a, para.b);
  1147  		if (!*pb) return;
  1148  		G1::setOrder(Fr::getOp().mp);
  1149  		mapTo.init(0, 0, para.curveType);
  1150  		Fp x0, y0;
  1151  		x0.setStr(pb, para.gx);
  1152  		if (!*pb) return;
  1153  		y0.setStr(pb, para.gy);
  1154  		basePoint.set(pb, x0, y0);
  1155  	}
  1156  #ifndef CYBOZU_DONT_USE_EXCEPTION
  1157  	void init(const mcl::CurveParam& cp, fp::Mode mode)
  1158  	{
  1159  		bool b;
  1160  		init(&b, cp, mode);
  1161  		if (!b) throw cybozu::Exception("Param:init");
  1162  	}
  1163  #endif
  1164  };
  1165  
  1166  template<size_t dummyImpl = 0>
  1167  struct StaticVar {
  1168  	static local::Param param;
  1169  };
  1170  
  1171  template<size_t dummyImpl>
  1172  local::Param StaticVar<dummyImpl>::param;
  1173  
  1174  } // mcl::bn::local
  1175  
  1176  namespace BN {
  1177  
  1178  static const local::Param& param = local::StaticVar<>::param;
  1179  
  1180  } // mcl::bn::BN
  1181  
  1182  namespace local {
  1183  
  1184  inline void mulArrayGLV1(G1& z, const G1& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
  1185  {
  1186  	mpz_class s;
  1187  	bool b;
  1188  	mcl::gmp::setArray(&b, s, y, yn);
  1189  	assert(b);
  1190  	if (isNegative) s = -s;
  1191  	BN::param.glv1.mul(z, x, s, constTime);
  1192  }
  1193  inline void mulArrayGLV2(G2& z, const G2& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
  1194  {
  1195  	mpz_class s;
  1196  	bool b;
  1197  	mcl::gmp::setArray(&b, s, y, yn);
  1198  	assert(b);
  1199  	if (isNegative) s = -s;
  1200  	BN::param.glv2.mul(z, x, s, constTime);
  1201  }
  1202  inline void powArrayGLV2(Fp12& z, const Fp12& x, const mcl::fp::Unit *y, size_t yn, bool isNegative, bool constTime)
  1203  {
  1204  	mpz_class s;
  1205  	bool b;
  1206  	mcl::gmp::setArray(&b, s, y, yn);
  1207  	assert(b);
  1208  	if (isNegative) s = -s;
  1209  	BN::param.glv2.pow(z, x, s, constTime);
  1210  }
  1211  
  1212  /*
  1213  	Faster Squaring in the Cyclotomic Subgroup of Sixth Degree Extensions
  1214  	Robert Granger, Michael Scott
  1215  */
  1216  inline void sqrFp4(Fp2& z0, Fp2& z1, const Fp2& x0, const Fp2& x1)
  1217  {
  1218  #if 1
  1219  	Fp2Dbl T0, T1, T2;
  1220  	Fp2Dbl::sqrPre(T0, x0);
  1221  	Fp2Dbl::sqrPre(T1, x1);
  1222  	Fp2Dbl::mul_xi(T2, T1);
  1223  	Fp2Dbl::add(T2, T2, T0);
  1224  	Fp2::add(z1, x0, x1);
  1225  	Fp2Dbl::mod(z0, T2);
  1226  	Fp2Dbl::sqrPre(T2, z1);
  1227  	Fp2Dbl::sub(T2, T2, T0);
  1228  	Fp2Dbl::sub(T2, T2, T1);
  1229  	Fp2Dbl::mod(z1, T2);
  1230  #else
  1231  	Fp2 t0, t1, t2;
  1232  	Fp2::sqr(t0, x0);
  1233  	Fp2::sqr(t1, x1);
  1234  	Fp2::mul_xi(z0, t1);
  1235  	z0 += t0;
  1236  	Fp2::add(z1, x0, x1);
  1237  	Fp2::sqr(z1, z1);
  1238  	z1 -= t0;
  1239  	z1 -= t1;
  1240  #endif
  1241  }
  1242  
  1243  inline void fasterSqr(Fp12& y, const Fp12& x)
  1244  {
  1245  #if 0
  1246  	Fp12::sqr(y, x);
  1247  #else
  1248  	const Fp2& x0(x.a.a);
  1249  	const Fp2& x4(x.a.b);
  1250  	const Fp2& x3(x.a.c);
  1251  	const Fp2& x2(x.b.a);
  1252  	const Fp2& x1(x.b.b);
  1253  	const Fp2& x5(x.b.c);
  1254  	Fp2& y0(y.a.a);
  1255  	Fp2& y4(y.a.b);
  1256  	Fp2& y3(y.a.c);
  1257  	Fp2& y2(y.b.a);
  1258  	Fp2& y1(y.b.b);
  1259  	Fp2& y5(y.b.c);
  1260  	Fp2 t0, t1;
  1261  	sqrFp4(t0, t1, x0, x1);
  1262  	Fp2::sub(y0, t0, x0);
  1263  	y0 += y0;
  1264  	y0 += t0;
  1265  	Fp2::add(y1, t1, x1);
  1266  	y1 += y1;
  1267  	y1 += t1;
  1268  	Fp2 t2, t3;
  1269  	sqrFp4(t0, t1, x2, x3);
  1270  	sqrFp4(t2, t3, x4, x5);
  1271  	Fp2::sub(y4, t0, x4);
  1272  	y4 += y4;
  1273  	y4 += t0;
  1274  	Fp2::add(y5, t1, x5);
  1275  	y5 += y5;
  1276  	y5 += t1;
  1277  	Fp2::mul_xi(t0, t3);
  1278  	Fp2::add(y2, t0, x2);
  1279  	y2 += y2;
  1280  	y2 += t0;
  1281  	Fp2::sub(y3, t2, x3);
  1282  	y3 += y3;
  1283  	y3 += t2;
  1284  #endif
  1285  }
  1286  
  1287  /*
  1288  	y = x^z if z > 0
  1289  	  = unitaryInv(x^(-z)) if z < 0
  1290  */
  1291  inline void pow_z(Fp12& y, const Fp12& x)
  1292  {
  1293  #if 1
  1294  	if (BN::param.cp.curveType == MCL_BN254) {
  1295  		Compress::fixed_power(y, x);
  1296  	} else {
  1297  		Fp12 orgX = x;
  1298  		y = x;
  1299  		Fp12 conj;
  1300  		conj.a = x.a;
  1301  		Fp6::neg(conj.b, x.b);
  1302  		for (size_t i = 1; i < BN::param.zReplTbl.size(); i++) {
  1303  			fasterSqr(y, y);
  1304  			if (BN::param.zReplTbl[i] > 0) {
  1305  				y *= orgX;
  1306  			} else if (BN::param.zReplTbl[i] < 0) {
  1307  				y *= conj;
  1308  			}
  1309  		}
  1310  	}
  1311  #else
  1312  	Fp12::pow(y, x, param.abs_z);
  1313  #endif
  1314  	if (BN::param.isNegative) {
  1315  		Fp12::unitaryInv(y, y);
  1316  	}
  1317  }
  1318  inline void mul_twist_b(Fp2& y, const Fp2& x)
  1319  {
  1320  	switch (BN::param.twist_b_type) {
  1321  	case local::tb_1m1i:
  1322  		/*
  1323  			b / xi = 1 - 1i
  1324  			(a + bi)(1 - 1i) = (a + b) + (b - a)i
  1325  		*/
  1326  		{
  1327  			Fp t;
  1328  			Fp::add(t, x.a, x.b);
  1329  			Fp::sub(y.b, x.b, x.a);
  1330  			y.a = t;
  1331  		}
  1332  		return;
  1333  	case local::tb_1m2i:
  1334  		/*
  1335  			b / xi = 1 - 2i
  1336  			(a + bi)(1 - 2i) = (a + 2b) + (b - 2a)i
  1337  		*/
  1338  		{
  1339  			Fp t;
  1340  			Fp::sub(t, x.b, x.a);
  1341  			t -= x.a;
  1342  			Fp::add(y.a, x.a, x.b);
  1343  			y.a += x.b;
  1344  			y.b = t;
  1345  		}
  1346  		return;
  1347  	case local::tb_generic:
  1348  		Fp2::mul(y, x, BN::param.twist_b);
  1349  		return;
  1350  	}
  1351  }
  1352  
  1353  inline void dblLineWithoutP(Fp6& l, G2& Q)
  1354  {
  1355  	Fp2 t0, t1, t2, t3, t4, t5;
  1356  	Fp2Dbl T0, T1;
  1357  	Fp2::sqr(t0, Q.z);
  1358  	Fp2::mul(t4, Q.x, Q.y);
  1359  	Fp2::sqr(t1, Q.y);
  1360  	Fp2::add(t3, t0, t0);
  1361  	Fp2::divBy2(t4, t4);
  1362  	Fp2::add(t5, t0, t1);
  1363  	t0 += t3;
  1364  	mul_twist_b(t2, t0);
  1365  	Fp2::sqr(t0, Q.x);
  1366  	Fp2::add(t3, t2, t2);
  1367  	t3 += t2;
  1368  	Fp2::sub(Q.x, t1, t3);
  1369  	t3 += t1;
  1370  	Q.x *= t4;
  1371  	Fp2::divBy2(t3, t3);
  1372  	Fp2Dbl::sqrPre(T0, t3);
  1373  	Fp2Dbl::sqrPre(T1, t2);
  1374  	Fp2Dbl::sub(T0, T0, T1);
  1375  	Fp2Dbl::add(T1, T1, T1);
  1376  	Fp2Dbl::sub(T0, T0, T1);
  1377  	Fp2::add(t3, Q.y, Q.z);
  1378  	Fp2Dbl::mod(Q.y, T0);
  1379  	Fp2::sqr(t3, t3);
  1380  	t3 -= t5;
  1381  	Fp2::mul(Q.z, t1, t3);
  1382  	Fp2::sub(l.a, t2, t1);
  1383  	l.c = t0;
  1384  	l.b = t3;
  1385  }
  1386  inline void addLineWithoutP(Fp6& l, G2& R, const G2& Q)
  1387  {
  1388  	Fp2 t1, t2, t3, t4;
  1389  	Fp2Dbl T1, T2;
  1390  	Fp2::mul(t1, R.z, Q.x);
  1391  	Fp2::mul(t2, R.z, Q.y);
  1392  	Fp2::sub(t1, R.x, t1);
  1393  	Fp2::sub(t2, R.y, t2);
  1394  	Fp2::sqr(t3, t1);
  1395  	Fp2::mul(R.x, t3, R.x);
  1396  	Fp2::sqr(t4, t2);
  1397  	t3 *= t1;
  1398  	t4 *= R.z;
  1399  	t4 += t3;
  1400  	t4 -= R.x;
  1401  	t4 -= R.x;
  1402  	R.x -= t4;
  1403  	Fp2Dbl::mulPre(T1, t2, R.x);
  1404  	Fp2Dbl::mulPre(T2, t3, R.y);
  1405  	Fp2Dbl::sub(T2, T1, T2);
  1406  	Fp2Dbl::mod(R.y, T2);
  1407  	Fp2::mul(R.x, t1, t4);
  1408  	Fp2::mul(R.z, t3, R.z);
  1409  	Fp2::neg(l.c, t2);
  1410  	Fp2Dbl::mulPre(T1, t2, Q.x);
  1411  	Fp2Dbl::mulPre(T2, t1, Q.y);
  1412  	Fp2Dbl::sub(T1, T1, T2);
  1413  	l.b = t1;
  1414  	Fp2Dbl::mod(l.a, T1);
  1415  }
  1416  inline void dblLine(Fp6& l, G2& Q, const G1& P)
  1417  {
  1418  	dblLineWithoutP(l, Q);
  1419  	local::updateLine(l, P);
  1420  }
  1421  inline void addLine(Fp6& l, G2& R, const G2& Q, const G1& P)
  1422  {
  1423  	addLineWithoutP(l, R, Q);
  1424  	local::updateLine(l, P);
  1425  }
  1426  inline void mulFp6cb_by_G1xy(Fp6& y, const Fp6& x, const G1& P)
  1427  {
  1428  	assert(P.isNormalized());
  1429  	if (&y != &x) y.a = x.a;
  1430  	Fp2::mulFp(y.c, x.c, P.x);
  1431  	Fp2::mulFp(y.b, x.b, P.y);
  1432  }
  1433  
  1434  /*
  1435  	x = a + bv + cv^2
  1436  	y = (y0, y4, y2) -> (y0, 0, y2, 0, y4, 0)
  1437  	z = xy = (a + bv + cv^2)(d + ev)
  1438  	= (ad + ce xi) + ((a + b)(d + e) - ad - be)v + (be + cd)v^2
  1439  */
  1440  inline void Fp6mul_01(Fp6& z, const Fp6& x, const Fp2& d, const Fp2& e)
  1441  {
  1442  	const Fp2& a = x.a;
  1443  	const Fp2& b = x.b;
  1444  	const Fp2& c = x.c;
  1445  	Fp2 t0, t1;
  1446  	Fp2Dbl AD, CE, BE, CD, T;
  1447  	Fp2Dbl::mulPre(AD, a, d);
  1448  	Fp2Dbl::mulPre(CE, c, e);
  1449  	Fp2Dbl::mulPre(BE, b, e);
  1450  	Fp2Dbl::mulPre(CD, c, d);
  1451  	Fp2::add(t0, a, b);
  1452  	Fp2::add(t1, d, e);
  1453  	Fp2Dbl::mulPre(T, t0, t1);
  1454  	T -= AD;
  1455  	T -= BE;
  1456  	Fp2Dbl::mod(z.b, T);
  1457  	Fp2Dbl::mul_xi(CE, CE);
  1458  	AD += CE;
  1459  	Fp2Dbl::mod(z.a, AD);
  1460  	BE += CD;
  1461  	Fp2Dbl::mod(z.c, BE);
  1462  }
  1463  /*
  1464  	input
  1465  	z = (z0 + z1v + z2v^2) + (z3 + z4v + z5v^2)w = Z0 + Z1w
  1466  	                  0        3  4
  1467  	x = (a, b, c) -> (b, 0, 0, c, a, 0) = X0 + X1w
  1468  	X0 = b = (b, 0, 0)
  1469  	X1 = c + av = (c, a, 0)
  1470  	w^2 = v, v^3 = xi
  1471  	output
  1472  	z <- zx = (Z0X0 + Z1X1v) + ((Z0 + Z1)(X0 + X1) - Z0X0 - Z1X1)w
  1473  	Z0X0 = Z0 b
  1474  	Z1X1 = Z1 (c, a, 0)
  1475  	(Z0 + Z1)(X0 + X1) = (Z0 + Z1) (b + c, a, 0)
  1476  */
  1477  inline void mul_403(Fp12& z, const Fp6& x)
  1478  {
  1479  	const Fp2& a = x.a;
  1480  	const Fp2& b = x.b;
  1481  	const Fp2& c = x.c;
  1482  #if 1
  1483  	Fp6& z0 = z.a;
  1484  	Fp6& z1 = z.b;
  1485  	Fp6 z0x0, z1x1, t0;
  1486  	Fp2 t1;
  1487  	Fp2::add(t1, x.b, c);
  1488  	Fp6::add(t0, z0, z1);
  1489  	Fp2::mul(z0x0.a, z0.a, b);
  1490  	Fp2::mul(z0x0.b, z0.b, b);
  1491  	Fp2::mul(z0x0.c, z0.c, b);
  1492  	Fp6mul_01(z1x1, z1, c, a);
  1493  	Fp6mul_01(t0, t0, t1, a);
  1494  	Fp6::sub(z.b, t0, z0x0);
  1495  	z.b -= z1x1;
  1496  	// a + bv + cv^2 = cxi + av + bv^2
  1497  	Fp2::mul_xi(z1x1.c, z1x1.c);
  1498  	Fp2::add(z.a.a, z0x0.a, z1x1.c);
  1499  	Fp2::add(z.a.b, z0x0.b, z1x1.a);
  1500  	Fp2::add(z.a.c, z0x0.c, z1x1.b);
  1501  #else
  1502  	Fp2& z0 = z.a.a;
  1503  	Fp2& z1 = z.a.b;
  1504  	Fp2& z2 = z.a.c;
  1505  	Fp2& z3 = z.b.a;
  1506  	Fp2& z4 = z.b.b;
  1507  	Fp2& z5 = z.b.c;
  1508  	Fp2Dbl Z0B, Z1B, Z2B, Z3C, Z4C, Z5C;
  1509  	Fp2Dbl T0, T1, T2, T3, T4, T5;
  1510  	Fp2 bc, t;
  1511  	Fp2::addPre(bc, b, c);
  1512  	Fp2::addPre(t, z5, z2);
  1513  	Fp2Dbl::mulPre(T5, t, bc);
  1514  	Fp2Dbl::mulPre(Z5C, z5, c);
  1515  	Fp2Dbl::mulPre(Z2B, z2, b);
  1516  	Fp2Dbl::sub(T5, T5, Z5C);
  1517  	Fp2Dbl::sub(T5, T5, Z2B);
  1518  	Fp2Dbl::mulPre(T0, z1, a);
  1519  	T5 += T0;
  1520  
  1521  	Fp2::addPre(t, z4, z1);
  1522  	Fp2Dbl::mulPre(T4, t, bc);
  1523  	Fp2Dbl::mulPre(Z4C, z4, c);
  1524  	Fp2Dbl::mulPre(Z1B, z1, b);
  1525  	Fp2Dbl::sub(T4, T4, Z4C);
  1526  	Fp2Dbl::sub(T4, T4, Z1B);
  1527  	Fp2Dbl::mulPre(T0, z0, a);
  1528  	T4 += T0;
  1529  
  1530  	Fp2::addPre(t, z3, z0);
  1531  	Fp2Dbl::mulPre(T3, t, bc);
  1532  	Fp2Dbl::mulPre(Z3C, z3, c);
  1533  	Fp2Dbl::mulPre(Z0B, z0, b);
  1534  	Fp2Dbl::sub(T3, T3, Z3C);
  1535  	Fp2Dbl::sub(T3, T3, Z0B);
  1536  	Fp2::mul_xi(t, z2);
  1537  	Fp2Dbl::mulPre(T0, t, a);
  1538  	T3 += T0;
  1539  
  1540  	Fp2Dbl::mulPre(T2, z3, a);
  1541  	T2 += Z2B;
  1542  	T2 += Z4C;
  1543  
  1544  	Fp2::mul_xi(t, z5);
  1545  	Fp2Dbl::mulPre(T1, t, a);
  1546  	T1 += Z1B;
  1547  	T1 += Z3C;
  1548  
  1549  	Fp2Dbl::mulPre(T0, z4, a);
  1550  	T0 += Z5C;
  1551  	Fp2Dbl::mul_xi(T0, T0);
  1552  	T0 += Z0B;
  1553  
  1554  	Fp2Dbl::mod(z0, T0);
  1555  	Fp2Dbl::mod(z1, T1);
  1556  	Fp2Dbl::mod(z2, T2);
  1557  	Fp2Dbl::mod(z3, T3);
  1558  	Fp2Dbl::mod(z4, T4);
  1559  	Fp2Dbl::mod(z5, T5);
  1560  #endif
  1561  }
  1562  /*
  1563  	input
  1564  	z = (z0 + z1v + z2v^2) + (z3 + z4v + z5v^2)w = Z0 + Z1w
  1565  	                  0  1        4
  1566  	x = (a, b, c) -> (a, c, 0, 0, b, 0) = X0 + X1w
  1567  	X0 = (a, c, 0)
  1568  	X1 = (0, b, 0)
  1569  	w^2 = v, v^3 = xi
  1570  	output
  1571  	z <- zx = (Z0X0 + Z1X1v) + ((Z0 + Z1)(X0 + X1) - Z0X0 - Z1X1)w
  1572  	Z0X0 = Z0 (a, c, 0)
  1573  	Z1X1 = Z1 (0, b, 0) = Z1 bv
  1574  	(Z0 + Z1)(X0 + X1) = (Z0 + Z1) (a, b + c, 0)
  1575  
  1576  	(a + bv + cv^2)v = c xi + av + bv^2
  1577  */
  1578  inline void mul_041(Fp12& z, const Fp6& x)
  1579  {
  1580  	const Fp2& a = x.a;
  1581  	const Fp2& b = x.b;
  1582  	const Fp2& c = x.c;
  1583  	Fp6& z0 = z.a;
  1584  	Fp6& z1 = z.b;
  1585  	Fp6 z0x0, z1x1, t0;
  1586  	Fp2 t1;
  1587  	Fp2::mul(z1x1.a, z1.c, b);
  1588  	Fp2::mul_xi(z1x1.a, z1x1.a);
  1589  	Fp2::mul(z1x1.b, z1.a, b);
  1590  	Fp2::mul(z1x1.c, z1.b, b);
  1591  	Fp2::add(t1, x.b, c);
  1592  	Fp6::add(t0, z0, z1);
  1593  	Fp6mul_01(z0x0, z0, a, c);
  1594  	Fp6mul_01(t0, t0, a, t1);
  1595  	Fp6::sub(z.b, t0, z0x0);
  1596  	z.b -= z1x1;
  1597  	// a + bv + cv^2 = cxi + av + bv^2
  1598  	Fp2::mul_xi(z1x1.c, z1x1.c);
  1599  	Fp2::add(z.a.a, z0x0.a, z1x1.c);
  1600  	Fp2::add(z.a.b, z0x0.b, z1x1.a);
  1601  	Fp2::add(z.a.c, z0x0.c, z1x1.b);
  1602  }
  1603  inline void mulSparse(Fp12& z, const Fp6& x)
  1604  {
  1605  	if (BN::param.cp.isMtype) {
  1606  		mul_041(z, x);
  1607  	} else {
  1608  		mul_403(z, x);
  1609  	}
  1610  }
  1611  inline void convertFp6toFp12(Fp12& y, const Fp6& x)
  1612  {
  1613  	if (BN::param.cp.isMtype) {
  1614  		// (a, b, c) -> (a, c, 0, 0, b, 0)
  1615  		y.a.a = x.a;
  1616  		y.b.b = x.b;
  1617  		y.a.b = x.c;
  1618  		y.a.c.clear();
  1619  		y.b.a.clear();
  1620  		y.b.c.clear();
  1621  	} else {
  1622  		// (a, b, c) -> (b, 0, 0, c, a, 0)
  1623  		y.b.b = x.a;
  1624  		y.a.a = x.b;
  1625  		y.b.a = x.c;
  1626  		y.a.b.clear();
  1627  		y.a.c.clear();
  1628  		y.b.c.clear();
  1629  	}
  1630  }
  1631  inline void mulSparse2(Fp12& z, const Fp6& x, const Fp6& y)
  1632  {
  1633  	convertFp6toFp12(z, x);
  1634  	mulSparse(z, y);
  1635  }
  1636  inline void mapToCyclotomic(Fp12& y, const Fp12& x)
  1637  {
  1638  	Fp12 z;
  1639  	Fp12::Frobenius2(z, x); // z = x^(p^2)
  1640  	z *= x; // x^(p^2 + 1)
  1641  	Fp12::inv(y, z);
  1642  	Fp6::neg(z.b, z.b); // z^(p^6) = conjugate of z
  1643  	y *= z;
  1644  }
  1645  /*
  1646  	Implementing Pairings at the 192-bit Security Level
  1647  	D.F.Aranha, L.F.Castaneda, E.Knapp, A.Menezes, F.R.Henriquez
  1648  	Section 4
  1649  */
  1650  inline void expHardPartBLS12(Fp12& y, const Fp12& x)
  1651  {
  1652  #if 0
  1653  	const mpz_class& p = param.p;
  1654  	mpz_class p2 = p * p;
  1655  	mpz_class p4 = p2 * p2;
  1656  	Fp12::pow(y, x, (p4 - p2 + 1) / param.r * 3);
  1657  	return;
  1658  #endif
  1659  #if 1
  1660  	Fp12 a0, a1, a2, a3, a4, a5, a6, a7;
  1661  	Fp12::unitaryInv(a0, x); // a0 = x^-1
  1662  	fasterSqr(a1, a0); // x^-2
  1663  	pow_z(a2, x); // x^z
  1664  	fasterSqr(a3, a2); // x^2z
  1665  	a1 *= a2; // a1 = x^(z-2)
  1666  	pow_z(a7, a1); // a7 = x^(z^2-2z)
  1667  	pow_z(a4, a7); // a4 = x^(z^3-2z^2)
  1668  	pow_z(a5, a4); // a5 = x^(z^4-2z^3)
  1669  	a3 *= a5; // a3 = x^(z^4-2z^3+2z)
  1670  	pow_z(a6, a3); // a6 = x^(z^5-2z^4+2z^2)
  1671  
  1672  	Fp12::unitaryInv(a1, a1); // x^(2-z)
  1673  	a1 *= a6; // x^(z^5-2z^4+2z^2-z+2)
  1674  	a1 *= x; // x^(z^5-2z^4+2z^2-z+3) = x^c0
  1675  	a3 *= a0; // x^(z^4-2z^3-1) = x^c1
  1676  	Fp12::Frobenius(a3, a3); // x^(c1 p)
  1677  	a1 *= a3; // x^(c0 + c1 p)
  1678  	a4 *= a2; // x^(z^3-2z^2+z) = x^c2
  1679  	Fp12::Frobenius2(a4, a4);  // x^(c2 p^2)
  1680  	a1 *= a4; // x^(c0 + c1 p + c2 p^2)
  1681  	a7 *= x; // x^(z^2-2z+1) = x^c3
  1682  	Fp12::Frobenius3(y, a7);
  1683  	y *= a1;
  1684  #else
  1685  	Fp12 t1, t2, t3;
  1686  	Fp12::Frobenius(t1, x);
  1687  	Fp12::Frobenius(t2, t1);
  1688  	Fp12::Frobenius(t3, t2);
  1689  	Fp12::pow(t1, t1, param.exp_c1);
  1690  	Fp12::pow(t2, t2, param.exp_c2);
  1691  	Fp12::pow(t3, t3, param.exp_c3);
  1692  	Fp12::pow(y, x, param.exp_c0);
  1693  	y *= t1;
  1694  	y *= t2;
  1695  	y *= t3;
  1696  #endif
  1697  }
  1698  /*
  1699  	Faster Hashing to G2
  1700  	Laura Fuentes-Castaneda, Edward Knapp, Francisco Rodriguez-Henriquez
  1701  	section 4.1
  1702  	y = x^(d 2z(6z^2 + 3z + 1)) where
  1703  	p = p(z) = 36z^4 + 36z^3 + 24z^2 + 6z + 1
  1704  	r = r(z) = 36z^4 + 36z^3 + 18z^2 + 6z + 1
  1705  	d = (p^4 - p^2 + 1) / r
  1706  	d1 = d 2z(6z^2 + 3z + 1)
  1707  	= c0 + c1 p + c2 p^2 + c3 p^3
  1708  
  1709  	c0 = 1 + 6z + 12z^2 + 12z^3
  1710  	c1 = 4z + 6z^2 + 12z^3
  1711  	c2 = 6z + 6z^2 + 12z^3
  1712  	c3 = -1 + 4z + 6z^2 + 12z^3
  1713  	x -> x^z -> x^2z -> x^4z -> x^6z -> x^(6z^2) -> x^(12z^2) -> x^(12z^3)
  1714  	a = x^(6z) x^(6z^2) x^(12z^3)
  1715  	b = a / (x^2z)
  1716  	x^d1 = (a x^(6z^2) x) b^p a^(p^2) (b / x)^(p^3)
  1717  */
  1718  inline void expHardPartBN(Fp12& y, const Fp12& x)
  1719  {
  1720  #if 0
  1721  	const mpz_class& p = param.p;
  1722  	mpz_class p2 = p * p;
  1723  	mpz_class p4 = p2 * p2;
  1724  	Fp12::pow(y, x, (p4 - p2 + 1) / param.r);
  1725  	return;
  1726  #endif
  1727  #if 1
  1728  	Fp12 a, b;
  1729  	Fp12 a2, a3;
  1730  	pow_z(b, x); // x^z
  1731  	fasterSqr(b, b); // x^2z
  1732  	fasterSqr(a, b); // x^4z
  1733  	a *= b; // x^6z
  1734  	pow_z(a2, a); // x^(6z^2)
  1735  	a *= a2;
  1736  	fasterSqr(a3, a2); // x^(12z^2)
  1737  	pow_z(a3, a3); // x^(12z^3)
  1738  	a *= a3;
  1739  	Fp12::unitaryInv(b, b);
  1740  	b *= a;
  1741  	a2 *= a;
  1742  	Fp12::Frobenius2(a, a);
  1743  	a *= a2;
  1744  	a *= x;
  1745  	Fp12::unitaryInv(y, x);
  1746  	y *= b;
  1747  	Fp12::Frobenius(b, b);
  1748  	a *= b;
  1749  	Fp12::Frobenius3(y, y);
  1750  	y *= a;
  1751  #else
  1752  	Fp12 t1, t2, t3;
  1753  	Fp12::Frobenius(t1, x);
  1754  	Fp12::Frobenius(t2, t1);
  1755  	Fp12::Frobenius(t3, t2);
  1756  	Fp12::pow(t1, t1, param.exp_c1);
  1757  	Fp12::pow(t2, t2, param.exp_c2);
  1758  	Fp12::pow(y, x, param.exp_c0);
  1759  	y *= t1;
  1760  	y *= t2;
  1761  	y *= t3;
  1762  #endif
  1763  }
  1764  /*
  1765  	remark : returned value is NOT on a curve
  1766  */
  1767  inline G1 makeAdjP(const G1& P)
  1768  {
  1769  	G1 adjP;
  1770  	Fp::add(adjP.x, P.x, P.x);
  1771  	adjP.x += P.x;
  1772  	Fp::neg(adjP.y, P.y);
  1773  	adjP.z = 1;
  1774  	return adjP;
  1775  }
  1776  
  1777  } // mcl::bn::local
  1778  
  1779  /*
  1780  	y = x^((p^12 - 1) / r)
  1781  	(p^12 - 1) / r = (p^2 + 1) (p^6 - 1) (p^4 - p^2 + 1)/r
  1782  	(a + bw)^(p^6) = a - bw in Fp12
  1783  	(p^4 - p^2 + 1)/r = c0 + c1 p + c2 p^2 + p^3
  1784  */
  1785  inline void finalExp(Fp12& y, const Fp12& x)
  1786  {
  1787  #if 1
  1788  	mapToCyclotomic(y, x);
  1789  #else
  1790  	const mpz_class& p = param.p;
  1791  	mpz_class p2 = p * p;
  1792  	mpz_class p4 = p2 * p2;
  1793  	Fp12::pow(y, x, p2 + 1);
  1794  	Fp12::pow(y, y, p4 * p2 - 1);
  1795  #endif
  1796  	if (BN::param.isBLS12) {
  1797  		expHardPartBLS12(y, y);
  1798  	} else {
  1799  		expHardPartBN(y, y);
  1800  	}
  1801  }
  1802  inline void millerLoop(Fp12& f, const G1& P_, const G2& Q_)
  1803  {
  1804  	G1 P(P_);
  1805  	G2 Q(Q_);
  1806  	P.normalize();
  1807  	Q.normalize();
  1808  	if (Q.isZero()) {
  1809  		f = 1;
  1810  		return;
  1811  	}
  1812  	assert(BN::param.siTbl[1] == 1);
  1813  	G2 T = Q;
  1814  	G2 negQ;
  1815  	if (BN::param.useNAF) {
  1816  		G2::neg(negQ, Q);
  1817  	}
  1818  	Fp6 d, e, l;
  1819  	d = e = l = 1;
  1820  	G1 adjP = makeAdjP(P);
  1821  	dblLine(d, T, adjP);
  1822  	addLine(l, T, Q, P);
  1823  	mulSparse2(f, d, l);
  1824  	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
  1825  		dblLine(l, T, adjP);
  1826  		Fp12::sqr(f, f);
  1827  		mulSparse(f, l);
  1828  		if (BN::param.siTbl[i]) {
  1829  			if (BN::param.siTbl[i] > 0) {
  1830  				addLine(l, T, Q, P);
  1831  			} else {
  1832  				addLine(l, T, negQ, P);
  1833  			}
  1834  			mulSparse(f, l);
  1835  		}
  1836  	}
  1837  	if (BN::param.z < 0) {
  1838  		G2::neg(T, T);
  1839  		Fp6::neg(f.b, f.b);
  1840  	}
  1841  	if (BN::param.isBLS12) return;
  1842  	G2 Q1, Q2;
  1843  	Frobenius(Q1, Q);
  1844  	Frobenius(Q2, Q1);
  1845  	G2::neg(Q2, Q2);
  1846  	addLine(d, T, Q1, P);
  1847  	addLine(e, T, Q2, P);
  1848  	Fp12 ft;
  1849  	mulSparse2(ft, d, e);
  1850  	f *= ft;
  1851  }
  1852  inline void pairing(Fp12& f, const G1& P, const G2& Q)
  1853  {
  1854  	millerLoop(f, P, Q);
  1855  	finalExp(f, f);
  1856  }
  1857  /*
  1858  	allocate param.precomputedQcoeffSize elements of Fp6 for Qcoeff
  1859  */
  1860  inline void precomputeG2(Fp6 *Qcoeff, const G2& Q_)
  1861  {
  1862  	size_t idx = 0;
  1863  	G2 Q(Q_);
  1864  	Q.normalize();
  1865  	if (Q.isZero()) {
  1866  		for (size_t i = 0; i < BN::param.precomputedQcoeffSize; i++) {
  1867  			Qcoeff[i] = 1;
  1868  		}
  1869  		return;
  1870  	}
  1871  	G2 T = Q;
  1872  	G2 negQ;
  1873  	if (BN::param.useNAF) {
  1874  		G2::neg(negQ, Q);
  1875  	}
  1876  	assert(BN::param.siTbl[1] == 1);
  1877  	dblLineWithoutP(Qcoeff[idx++], T);
  1878  	addLineWithoutP(Qcoeff[idx++], T, Q);
  1879  	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
  1880  		dblLineWithoutP(Qcoeff[idx++], T);
  1881  		if (BN::param.siTbl[i]) {
  1882  			if (BN::param.siTbl[i] > 0) {
  1883  				addLineWithoutP(Qcoeff[idx++], T, Q);
  1884  			} else {
  1885  				addLineWithoutP(Qcoeff[idx++], T, negQ);
  1886  			}
  1887  		}
  1888  	}
  1889  	if (BN::param.z < 0) {
  1890  		G2::neg(T, T);
  1891  	}
  1892  	if (BN::param.isBLS12) return;
  1893  	G2 Q1, Q2;
  1894  	Frobenius(Q1, Q);
  1895  	Frobenius(Q2, Q1);
  1896  	G2::neg(Q2, Q2);
  1897  	addLineWithoutP(Qcoeff[idx++], T, Q1);
  1898  	addLineWithoutP(Qcoeff[idx++], T, Q2);
  1899  	assert(idx == BN::param.precomputedQcoeffSize);
  1900  }
  1901  /*
  1902  	millerLoop(e, P, Q) is same as the following
  1903  	std::vector<Fp6> Qcoeff;
  1904  	precomputeG2(Qcoeff, Q);
  1905  	precomputedMillerLoop(e, P, Qcoeff);
  1906  */
  1907  #ifndef CYBOZU_DONT_USE_EXCEPTION
  1908  inline void precomputeG2(std::vector<Fp6>& Qcoeff, const G2& Q)
  1909  {
  1910  	Qcoeff.resize(BN::param.precomputedQcoeffSize);
  1911  	precomputeG2(Qcoeff.data(), Q);
  1912  }
  1913  #endif
  1914  template<class Array>
  1915  void precomputeG2(bool *pb, Array& Qcoeff, const G2& Q)
  1916  {
  1917  	*pb = Qcoeff.resize(BN::param.precomputedQcoeffSize);
  1918  	if (!*pb) return;
  1919  	precomputeG2(Qcoeff.data(), Q);
  1920  }
  1921  
  1922  inline void precomputedMillerLoop(Fp12& f, const G1& P_, const Fp6* Qcoeff)
  1923  {
  1924  	G1 P(P_);
  1925  	P.normalize();
  1926  	G1 adjP = makeAdjP(P);
  1927  	size_t idx = 0;
  1928  	Fp6 d, e, l;
  1929  	mulFp6cb_by_G1xy(d, Qcoeff[idx], adjP);
  1930  	idx++;
  1931  
  1932  	mulFp6cb_by_G1xy(e, Qcoeff[idx], P);
  1933  	idx++;
  1934  	mulSparse2(f, d, e);
  1935  	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
  1936  		mulFp6cb_by_G1xy(l, Qcoeff[idx], adjP);
  1937  		idx++;
  1938  		Fp12::sqr(f, f);
  1939  		mulSparse(f, l);
  1940  		if (BN::param.siTbl[i]) {
  1941  			mulFp6cb_by_G1xy(l, Qcoeff[idx], P);
  1942  			idx++;
  1943  			mulSparse(f, l);
  1944  		}
  1945  	}
  1946  	if (BN::param.z < 0) {
  1947  		Fp6::neg(f.b, f.b);
  1948  	}
  1949  	if (BN::param.isBLS12) return;
  1950  	mulFp6cb_by_G1xy(d, Qcoeff[idx], P);
  1951  	idx++;
  1952  	mulFp6cb_by_G1xy(e, Qcoeff[idx], P);
  1953  	idx++;
  1954  	Fp12 ft;
  1955  	mulSparse2(ft, d, e);
  1956  	f *= ft;
  1957  }
  1958  #ifndef CYBOZU_DONT_USE_EXCEPTION
  1959  inline void precomputedMillerLoop(Fp12& f, const G1& P, const std::vector<Fp6>& Qcoeff)
  1960  {
  1961  	precomputedMillerLoop(f, P, Qcoeff.data());
  1962  }
  1963  #endif
  1964  /*
  1965  	f = MillerLoop(P1, Q1) x MillerLoop(P2, Q2)
  1966  	Q2coeff : precomputed Q2
  1967  */
  1968  inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1_, const G2& Q1_, const G1& P2_, const Fp6* Q2coeff)
  1969  {
  1970  	G1 P1(P1_), P2(P2_);
  1971  	G2 Q1(Q1_);
  1972  	P1.normalize();
  1973  	P2.normalize();
  1974  	Q1.normalize();
  1975  	if (Q1.isZero()) {
  1976  		precomputedMillerLoop(f, P2_, Q2coeff);
  1977  		return;
  1978  	}
  1979  	G2 T = Q1;
  1980  	G2 negQ1;
  1981  	if (BN::param.useNAF) {
  1982  		G2::neg(negQ1, Q1);
  1983  	}
  1984  	G1 adjP1 = makeAdjP(P1);
  1985  	G1 adjP2 = makeAdjP(P2);
  1986  	size_t idx = 0;
  1987  	Fp6 d1, d2, e1, e2, l1, l2;
  1988  	dblLine(d1, T, adjP1);
  1989  	mulFp6cb_by_G1xy(d2, Q2coeff[idx], adjP2);
  1990  	idx++;
  1991  
  1992  	Fp12 f1, f2;
  1993  	e1 = 1;
  1994  	addLine(e1, T, Q1, P1);
  1995  	mulSparse2(f1, d1, e1);
  1996  
  1997  	mulFp6cb_by_G1xy(e2, Q2coeff[idx], P2);
  1998  	mulSparse2(f2, d2, e2);
  1999  	Fp12::mul(f, f1, f2);
  2000  	idx++;
  2001  	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
  2002  		dblLine(l1, T, adjP1);
  2003  		mulFp6cb_by_G1xy(l2, Q2coeff[idx], adjP2);
  2004  		idx++;
  2005  		Fp12::sqr(f, f);
  2006  		mulSparse2(f1, l1, l2);
  2007  		f *= f1;
  2008  		if (BN::param.siTbl[i]) {
  2009  			if (BN::param.siTbl[i] > 0) {
  2010  				addLine(l1, T, Q1, P1);
  2011  			} else {
  2012  				addLine(l1, T, negQ1, P1);
  2013  			}
  2014  			mulFp6cb_by_G1xy(l2, Q2coeff[idx], P2);
  2015  			idx++;
  2016  			mulSparse2(f1, l1, l2);
  2017  			f *= f1;
  2018  		}
  2019  	}
  2020  	if (BN::param.z < 0) {
  2021  		G2::neg(T, T);
  2022  		Fp6::neg(f.b, f.b);
  2023  	}
  2024  	if (BN::param.isBLS12) return;
  2025  	G2 Q11, Q12;
  2026  	Frobenius(Q11, Q1);
  2027  	Frobenius(Q12, Q11);
  2028  	G2::neg(Q12, Q12);
  2029  	addLine(d1, T, Q11, P1);
  2030  	mulFp6cb_by_G1xy(d2, Q2coeff[idx], P2);
  2031  	idx++;
  2032  	addLine(e1, T, Q12, P1);
  2033  	mulFp6cb_by_G1xy(e2, Q2coeff[idx], P2);
  2034  	idx++;
  2035  	mulSparse2(f1, d1, e1);
  2036  	mulSparse2(f2, d2, e2);
  2037  	f *= f1;
  2038  	f *= f2;
  2039  }
  2040  /*
  2041  	f = MillerLoop(P1, Q1) x MillerLoop(P2, Q2)
  2042  	Q1coeff, Q2coeff : precomputed Q1, Q2
  2043  */
  2044  inline void precomputedMillerLoop2(Fp12& f, const G1& P1_, const Fp6* Q1coeff, const G1& P2_, const Fp6* Q2coeff)
  2045  {
  2046  	G1 P1(P1_), P2(P2_);
  2047  	P1.normalize();
  2048  	P2.normalize();
  2049  	G1 adjP1 = makeAdjP(P1);
  2050  	G1 adjP2 = makeAdjP(P2);
  2051  	size_t idx = 0;
  2052  	Fp6 d1, d2, e1, e2, l1, l2;
  2053  	mulFp6cb_by_G1xy(d1, Q1coeff[idx], adjP1);
  2054  	mulFp6cb_by_G1xy(d2, Q2coeff[idx], adjP2);
  2055  	idx++;
  2056  
  2057  	Fp12 f1, f2;
  2058  	mulFp6cb_by_G1xy(e1, Q1coeff[idx], P1);
  2059  	mulSparse2(f1, d1, e1);
  2060  
  2061  	mulFp6cb_by_G1xy(e2, Q2coeff[idx], P2);
  2062  	mulSparse2(f2, d2, e2);
  2063  	Fp12::mul(f, f1, f2);
  2064  	idx++;
  2065  	for (size_t i = 2; i < BN::param.siTbl.size(); i++) {
  2066  		mulFp6cb_by_G1xy(l1, Q1coeff[idx], adjP1);
  2067  		mulFp6cb_by_G1xy(l2, Q2coeff[idx], adjP2);
  2068  		idx++;
  2069  		Fp12::sqr(f, f);
  2070  		mulSparse2(f1, l1, l2);
  2071  		f *= f1;
  2072  		if (BN::param.siTbl[i]) {
  2073  			mulFp6cb_by_G1xy(l1, Q1coeff[idx], P1);
  2074  			mulFp6cb_by_G1xy(l2, Q2coeff[idx], P2);
  2075  			idx++;
  2076  			mulSparse2(f1, l1, l2);
  2077  			f *= f1;
  2078  		}
  2079  	}
  2080  	if (BN::param.z < 0) {
  2081  		Fp6::neg(f.b, f.b);
  2082  	}
  2083  	if (BN::param.isBLS12) return;
  2084  	mulFp6cb_by_G1xy(d1, Q1coeff[idx], P1);
  2085  	mulFp6cb_by_G1xy(d2, Q2coeff[idx], P2);
  2086  	idx++;
  2087  	mulFp6cb_by_G1xy(e1, Q1coeff[idx], P1);
  2088  	mulFp6cb_by_G1xy(e2, Q2coeff[idx], P2);
  2089  	idx++;
  2090  	mulSparse2(f1, d1, e1);
  2091  	mulSparse2(f2, d2, e2);
  2092  	f *= f1;
  2093  	f *= f2;
  2094  }
  2095  #ifndef CYBOZU_DONT_USE_EXCEPTION
  2096  inline void precomputedMillerLoop2(Fp12& f, const G1& P1, const std::vector<Fp6>& Q1coeff, const G1& P2, const std::vector<Fp6>& Q2coeff)
  2097  {
  2098  	precomputedMillerLoop2(f, P1, Q1coeff.data(), P2, Q2coeff.data());
  2099  }
  2100  inline void precomputedMillerLoop2mixed(Fp12& f, const G1& P1, const G2& Q1, const G1& P2, const std::vector<Fp6>& Q2coeff)
  2101  {
  2102  	precomputedMillerLoop2mixed(f, P1, Q1, P2, Q2coeff.data());
  2103  }
  2104  #endif
  2105  inline void mapToG1(bool *pb, G1& P, const Fp& x) { *pb = BN::param.mapTo.calcG1(P, x); }
  2106  inline void mapToG2(bool *pb, G2& P, const Fp2& x) { *pb = BN::param.mapTo.calcG2(P, x); }
  2107  #ifndef CYBOZU_DONT_USE_EXCEPTION
  2108  inline void mapToG1(G1& P, const Fp& x)
  2109  {
  2110  	bool b;
  2111  	mapToG1(&b, P, x);
  2112  	if (!b) throw cybozu::Exception("mapToG1:bad value") << x;
  2113  }
  2114  inline void mapToG2(G2& P, const Fp2& x)
  2115  {
  2116  	bool b;
  2117  	mapToG2(&b, P, x);
  2118  	if (!b) throw cybozu::Exception("mapToG2:bad value") << x;
  2119  }
  2120  #endif
  2121  inline void hashAndMapToG1(G1& P, const void *buf, size_t bufSize)
  2122  {
  2123  	Fp t;
  2124  	t.setHashOf(buf, bufSize);
  2125  	bool b;
  2126  	mapToG1(&b, P, t);
  2127  	// It will not happen that the hashed value is equal to special value
  2128  	assert(b);
  2129  	(void)b;
  2130  }
  2131  inline void hashAndMapToG2(G2& P, const void *buf, size_t bufSize)
  2132  {
  2133  	Fp2 t;
  2134  	t.a.setHashOf(buf, bufSize);
  2135  	t.b.clear();
  2136  	bool b;
  2137  	mapToG2(&b, P, t);
  2138  	// It will not happen that the hashed value is equal to special value
  2139  	assert(b);
  2140  	(void)b;
  2141  }
  2142  #ifndef CYBOZU_DONT_USE_STRING
  2143  inline void hashAndMapToG1(G1& P, const std::string& str)
  2144  {
  2145  	hashAndMapToG1(P, str.c_str(), str.size());
  2146  }
  2147  inline void hashAndMapToG2(G2& P, const std::string& str)
  2148  {
  2149  	hashAndMapToG2(P, str.c_str(), str.size());
  2150  }
  2151  #endif
  2152  inline void verifyOrderG1(bool doVerify)
  2153  {
  2154  	if (BN::param.isBLS12) {
  2155  		G1::setOrder(doVerify ? BN::param.r : 0);
  2156  	}
  2157  }
  2158  inline void verifyOrderG2(bool doVerify)
  2159  {
  2160  	G2::setOrder(doVerify ? BN::param.r : 0);
  2161  }
  2162  
  2163  // backward compatibility
  2164  using mcl::CurveParam;
  2165  static const CurveParam& CurveFp254BNb = BN254;
  2166  static const CurveParam& CurveFp382_1 = BN381_1;
  2167  static const CurveParam& CurveFp382_2 = BN381_2;
  2168  static const CurveParam& CurveFp462 = BN462;
  2169  static const CurveParam& CurveSNARK1 = BN_SNARK1;
  2170  
  2171  /*
  2172  	FrobeniusOnTwist for Dtype
  2173  	p mod 6 = 1, w^6 = xi
  2174  	Frob(x', y') = phi Frob phi^-1(x', y')
  2175  	= phi Frob (x' w^2, y' w^3)
  2176  	= phi (x'^p w^2p, y'^p w^3p)
  2177  	= (F(x') w^2(p - 1), F(y') w^3(p - 1))
  2178  	= (F(x') g^2, F(y') g^3)
  2179  
  2180  	FrobeniusOnTwist for Dtype
  2181  	use (1/g) instead of g
  2182  */
  2183  inline void Frobenius(G2& D, const G2& S)
  2184  {
  2185  	Fp2::Frobenius(D.x, S.x);
  2186  	Fp2::Frobenius(D.y, S.y);
  2187  	Fp2::Frobenius(D.z, S.z);
  2188  	D.x *= BN::param.g2;
  2189  	D.y *= BN::param.g3;
  2190  }
  2191  inline void Frobenius2(G2& D, const G2& S)
  2192  {
  2193  	Frobenius(D, S);
  2194  	Frobenius(D, D);
  2195  }
  2196  inline void Frobenius3(G2& D, const G2& S)
  2197  {
  2198  	Frobenius(D, S);
  2199  	Frobenius(D, D);
  2200  	Frobenius(D, D);
  2201  }
  2202  
  2203  namespace BN {
  2204  
  2205  using namespace mcl::bn; // backward compatibility
  2206  
  2207  inline void init(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode = fp::FP_AUTO)
  2208  {
  2209  	local::StaticVar<>::param.init(pb, cp, mode);
  2210  	if (!*pb) return;
  2211  	G1::setMulArrayGLV(local::mulArrayGLV1);
  2212  	G2::setMulArrayGLV(local::mulArrayGLV2);
  2213  	Fp12::setPowArrayGLV(local::powArrayGLV2);
  2214  	G1::setCompressedExpression();
  2215  	G2::setCompressedExpression();
  2216  	*pb = true;
  2217  }
  2218  
  2219  #ifndef CYBOZU_DONT_USE_EXCEPTION
  2220  inline void init(const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode = fp::FP_AUTO)
  2221  {
  2222  	bool b;
  2223  	init(&b, cp, mode);
  2224  	if (!b) throw cybozu::Exception("BN:init");
  2225  }
  2226  #endif
  2227  
  2228  } // mcl::bn::BN
  2229  
  2230  inline void initPairing(bool *pb, const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode = fp::FP_AUTO)
  2231  {
  2232  	BN::init(pb, cp, mode);
  2233  }
  2234  
  2235  #ifndef CYBOZU_DONT_USE_EXCEPTION
  2236  inline void initPairing(const mcl::CurveParam& cp = mcl::BN254, fp::Mode mode = fp::FP_AUTO)
  2237  {
  2238  	bool b;
  2239  	BN::init(&b, cp, mode);
  2240  	if (!b) throw cybozu::Exception("bn:initPairing");
  2241  }
  2242  #endif
  2243  
  2244  inline void initG1only(bool *pb, const mcl::EcParam& para)
  2245  {
  2246  	local::StaticVar<>::param.initG1only(pb, para);
  2247  	if (!*pb) return;
  2248  	G1::setMulArrayGLV(0);
  2249  	G2::setMulArrayGLV(0);
  2250  	Fp12::setPowArrayGLV(0);
  2251  	G1::setCompressedExpression();
  2252  	G2::setCompressedExpression();
  2253  }
  2254  
  2255  inline const G1& getG1basePoint()
  2256  {
  2257  	return local::StaticVar<>::param.basePoint;
  2258  }
  2259  
  2260  } } // mcl::bn
  2261