github.com/platonnetwork/platon-go@v0.7.6/cases/tool/win/bls_win/include/mcl/fp_tower.hpp (about)

     1  #pragma once
     2  /**
     3  	@file
     4  	@brief finite field extension class
     5  	@author MITSUNARI Shigeo(@herumi)
     6  	@license modified new BSD license
     7  	http://opensource.org/licenses/BSD-3-Clause
     8  */
     9  #include <mcl/fp.hpp>
    10  
    11  namespace mcl {
    12  
    13  template<class Fp>
    14  class FpDblT : public fp::Serializable<FpDblT<Fp> > {
    15  	typedef fp::Unit Unit;
    16  	Unit v_[Fp::maxSize * 2];
    17  public:
    18  	static size_t getUnitSize() { return Fp::op_.N * 2; }
    19  	FpDblT() : v_()
    20  	{
    21  	}
    22  	FpDblT(const FpDblT& rhs)
    23  	{
    24  		const size_t n = getUnitSize();
    25  		for (size_t i = 0; i < n; i++) {
    26  			v_[i] = rhs.v_[i];
    27  		}
    28  	}
    29  	void dump() const
    30  	{
    31  		const size_t n = getUnitSize();
    32  		for (size_t i = 0; i < n; i++) {
    33  			mcl::fp::dumpUnit(v_[n - 1 - i]);
    34  		}
    35  		printf("\n");
    36  	}
    37  	template<class OutputStream>
    38  	void save(bool *pb, OutputStream& os, int) const
    39  	{
    40  		char buf[1024];
    41  		size_t n = mcl::fp::arrayToHex(buf, sizeof(buf), v_, getUnitSize());
    42  		if (n == 0) {
    43  			*pb = false;
    44  			return;
    45  		}
    46  		cybozu::write(pb, os, buf + sizeof(buf) - n, sizeof(buf));
    47  	}
    48  	template<class InputStream>
    49  	void load(bool *pb, InputStream& is, int)
    50  	{
    51  		char buf[1024];
    52  		*pb = false;
    53  		size_t n = fp::local::loadWord(buf, sizeof(buf), is);
    54  		if (n == 0) return;
    55  		n = fp::hexToArray(v_, getUnitSize(), buf, n);
    56  		if (n == 0) return;
    57  		for (size_t i = n; i < getUnitSize(); i++) v_[i] = 0;
    58  		*pb = true;
    59  	}
    60  #ifndef CYBOZU_DONT_USE_EXCEPTION
    61  	template<class OutputStream>
    62  	void save(OutputStream& os, int ioMode = IoSerialize) const
    63  	{
    64  		bool b;
    65  		save(&b, os, ioMode);
    66  		if (!b) throw cybozu::Exception("FpDblT:save") << ioMode;
    67  	}
    68  	template<class InputStream>
    69  	void load(InputStream& is, int ioMode = IoSerialize)
    70  	{
    71  		bool b;
    72  		load(&b, is, ioMode);
    73  		if (!b) throw cybozu::Exception("FpDblT:load") << ioMode;
    74  	}
    75  	void getMpz(mpz_class& x) const
    76  	{
    77  		bool b;
    78  		getMpz(&b, x);
    79  		if (!b) throw cybozu::Exception("FpDblT:getMpz");
    80  	}
    81  	mpz_class getMpz() const
    82  	{
    83  		mpz_class x;
    84  		getMpz(x);
    85  		return x;
    86  	}
    87  #endif
    88  	void clear()
    89  	{
    90  		const size_t n = getUnitSize();
    91  		for (size_t i = 0; i < n; i++) {
    92  			v_[i] = 0;
    93  		}
    94  	}
    95  	FpDblT& operator=(const FpDblT& rhs)
    96  	{
    97  		const size_t n = getUnitSize();
    98  		for (size_t i = 0; i < n; i++) {
    99  			v_[i] = rhs.v_[i];
   100  		}
   101  		return *this;
   102  	}
   103  	// QQQ : does not check range of x strictly(use for debug)
   104  	void setMpz(const mpz_class& x)
   105  	{
   106  		assert(x >= 0);
   107  		const size_t xn = gmp::getUnitSize(x);
   108  		const size_t N2 = getUnitSize();
   109  		if (xn > N2) {
   110  			assert(0);
   111  			return;
   112  		}
   113  		memcpy(v_, gmp::getUnit(x), xn * sizeof(Unit));
   114  		memset(v_ + xn, 0, (N2 - xn) * sizeof(Unit));
   115  	}
   116  	void getMpz(bool *pb, mpz_class& x) const
   117  	{
   118  		gmp::setArray(pb, x, v_, Fp::op_.N * 2);
   119  	}
   120  #ifdef MCL_XBYAK_DIRECT_CALL
   121  	static void (*add)(FpDblT& z, const FpDblT& x, const FpDblT& y);
   122  	static void (*sub)(FpDblT& z, const FpDblT& x, const FpDblT& y);
   123  	static void (*mod)(Fp& z, const FpDblT& xy);
   124  	static void (*addPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
   125  	static void (*subPre)(FpDblT& z, const FpDblT& x, const FpDblT& y);
   126  	static void addC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
   127  	static void subC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
   128  	static void modC(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
   129  	static void addPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
   130  	static void subPreC(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
   131  #else
   132  	static void add(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_add(z.v_, x.v_, y.v_, Fp::op_.p); }
   133  	static void sub(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_sub(z.v_, x.v_, y.v_, Fp::op_.p); }
   134  	static void mod(Fp& z, const FpDblT& xy) { Fp::op_.fpDbl_mod(z.v_, xy.v_, Fp::op_.p); }
   135  	static void addPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_addPre(z.v_, x.v_, y.v_); }
   136  	static void subPre(FpDblT& z, const FpDblT& x, const FpDblT& y) { Fp::op_.fpDbl_subPre(z.v_, x.v_, y.v_); }
   137  #endif
   138  	static void mulPreC(FpDblT& xy, const Fp& x, const Fp& y) { Fp::op_.fpDbl_mulPre(xy.v_, x.v_, y.v_); }
   139  	static void sqrPreC(FpDblT& xx, const Fp& x) { Fp::op_.fpDbl_sqrPre(xx.v_, x.v_); }
   140  	/*
   141  		mul(z, x, y) = mulPre(xy, x, y) + mod(z, xy)
   142  	*/
   143  	static void (*mulPre)(FpDblT& xy, const Fp& x, const Fp& y);
   144  	static void (*sqrPre)(FpDblT& xx, const Fp& x);
   145  	static void mulUnit(FpDblT& z, const FpDblT& x, Unit y)
   146  	{
   147  		if (mulSmallUnit(z, x, y)) return;
   148  		assert(0); // not supported y
   149  	}
   150  	static void init()
   151  	{
   152  		const mcl::fp::Op& op = Fp::getOp();
   153  #ifdef MCL_XBYAK_DIRECT_CALL
   154  		add = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_addA_);
   155  		if (add == 0) add = addC;
   156  		sub = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subA_);
   157  		if (sub == 0) sub = subC;
   158  		mod = fp::func_ptr_cast<void (*)(Fp&, const FpDblT&)>(op.fpDbl_modA_);
   159  		if (mod == 0) mod = modC;
   160  		addPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_addPre);
   161  		if (addPre == 0) addPre = addPreC;
   162  		subPre = fp::func_ptr_cast<void (*)(FpDblT&, const FpDblT&, const FpDblT&)>(op.fpDbl_subPre);
   163  		if (subPre == 0) subPre = subPreC;
   164  #endif
   165  		if (op.fpDbl_mulPreA_) {
   166  			mulPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&, const Fp&)>(op.fpDbl_mulPreA_);
   167  		} else {
   168  			mulPre = mulPreC;
   169  		}
   170  		if (op.fpDbl_sqrPreA_) {
   171  			sqrPre = fp::func_ptr_cast<void (*)(FpDblT&, const Fp&)>(op.fpDbl_sqrPreA_);
   172  		} else {
   173  			sqrPre = sqrPreC;
   174  		}
   175  	}
   176  	void operator+=(const FpDblT& x) { add(*this, *this, x); }
   177  	void operator-=(const FpDblT& x) { sub(*this, *this, x); }
   178  };
   179  
   180  #ifdef MCL_XBYAK_DIRECT_CALL
   181  template<class Fp> void (*FpDblT<Fp>::add)(FpDblT&, const FpDblT&, const FpDblT&);
   182  template<class Fp> void (*FpDblT<Fp>::sub)(FpDblT&, const FpDblT&, const FpDblT&);
   183  template<class Fp> void (*FpDblT<Fp>::mod)(Fp&, const FpDblT&);
   184  template<class Fp> void (*FpDblT<Fp>::addPre)(FpDblT&, const FpDblT&, const FpDblT&);
   185  template<class Fp> void (*FpDblT<Fp>::subPre)(FpDblT&, const FpDblT&, const FpDblT&);
   186  #endif
   187  template<class Fp> void (*FpDblT<Fp>::mulPre)(FpDblT&, const Fp&, const Fp&);
   188  template<class Fp> void (*FpDblT<Fp>::sqrPre)(FpDblT&, const Fp&);
   189  
   190  template<class Fp> struct Fp12T;
   191  template<class Fp> class BNT;
   192  template<class Fp> struct Fp2DblT;
   193  /*
   194  	beta = -1
   195  	Fp2 = F[i] / (i^2 + 1)
   196  	x = a + bi
   197  */
   198  template<class _Fp>
   199  class Fp2T : public fp::Serializable<Fp2T<_Fp>,
   200  	fp::Operator<Fp2T<_Fp> > > {
   201  	typedef _Fp Fp;
   202  	typedef fp::Unit Unit;
   203  	typedef FpDblT<Fp> FpDbl;
   204  	typedef Fp2DblT<Fp> Fp2Dbl;
   205  	static const size_t gN = 5;
   206  	/*
   207  		g = xi^((p - 1) / 6)
   208  		g[] = { g^2, g^4, g^1, g^3, g^5 }
   209  	*/
   210  	static Fp2T g[gN];
   211  	static Fp2T g2[gN];
   212  	static Fp2T g3[gN];
   213  public:
   214  	static const Fp2T *get_gTbl() { return &g[0]; }
   215  	static const Fp2T *get_g2Tbl() { return &g2[0]; }
   216  	static const Fp2T *get_g3Tbl() { return &g3[0]; }
   217  	typedef typename Fp::BaseFp BaseFp;
   218  	static const size_t maxSize = Fp::maxSize * 2;
   219  	static inline size_t getByteSize() { return Fp::getByteSize() * 2; }
   220  	void dump() const
   221  	{
   222  		a.dump();
   223  		b.dump();
   224  	}
   225  	Fp a, b;
   226  	Fp2T() { }
   227  	Fp2T(int64_t a) : a(a), b(0) { }
   228  	Fp2T(const Fp& a, const Fp& b) : a(a), b(b) { }
   229  	Fp2T(int64_t a, int64_t b) : a(a), b(b) { }
   230  	Fp* getFp0() { return &a; }
   231  	const Fp* getFp0() const { return &a; }
   232  	const Unit* getUnit() const { return a.getUnit(); }
   233  	void clear()
   234  	{
   235  		a.clear();
   236  		b.clear();
   237  	}
   238  	void set(const Fp &a_, const Fp &b_)
   239  	{
   240  		a = a_;
   241  		b = b_;
   242  	}
   243  #ifdef MCL_XBYAK_DIRECT_CALL
   244  	static void (*add)(Fp2T& z, const Fp2T& x, const Fp2T& y);
   245  	static void (*sub)(Fp2T& z, const Fp2T& x, const Fp2T& y);
   246  	static void (*neg)(Fp2T& y, const Fp2T& x);
   247  	static void (*mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
   248  	static void (*sqr)(Fp2T& y, const Fp2T& x);
   249  #else
   250  	static void add(Fp2T& z, const Fp2T& x, const Fp2T& y) { addC(z, x, y); }
   251  	static void sub(Fp2T& z, const Fp2T& x, const Fp2T& y) { subC(z, x, y); }
   252  	static void neg(Fp2T& y, const Fp2T& x) { negC(y, x); }
   253  	static void mul(Fp2T& z, const Fp2T& x, const Fp2T& y) { mulC(z, x, y); }
   254  	static void sqr(Fp2T& y, const Fp2T& x) { sqrC(y, x); }
   255  #endif
   256  	static void (*mul_xi)(Fp2T& y, const Fp2T& x);
   257  	static void addPre(Fp2T& z, const Fp2T& x, const Fp2T& y) { Fp::addPre(z.a, x.a, y.a); Fp::addPre(z.b, x.b, y.b); }
   258  	static void inv(Fp2T& y, const Fp2T& x) { Fp::op_.fp2_inv(y.a.v_, x.a.v_); }
   259  	static void divBy2(Fp2T& y, const Fp2T& x)
   260  	{
   261  		Fp::divBy2(y.a, x.a);
   262  		Fp::divBy2(y.b, x.b);
   263  	}
   264  	static void divBy4(Fp2T& y, const Fp2T& x)
   265  	{
   266  		Fp::divBy4(y.a, x.a);
   267  		Fp::divBy4(y.b, x.b);
   268  	}
   269  	static void mulFp(Fp2T& z, const Fp2T& x, const Fp& y)
   270  	{
   271  		Fp::mul(z.a, x.a, y);
   272  		Fp::mul(z.b, x.b, y);
   273  	}
   274  	template<class S>
   275  	void setArray(bool *pb, const S *buf, size_t n)
   276  	{
   277  		assert((n & 1) == 0);
   278  		n /= 2;
   279  		a.setArray(pb, buf, n);
   280  		if (!*pb) return;
   281  		b.setArray(pb, buf + n, n);
   282  	}
   283  	template<class InputStream>
   284  	void load(bool *pb, InputStream& is, int ioMode)
   285  	{
   286  		Fp *ap = &a, *bp = &b;
   287  		if (Fp::isETHserialization_ && ioMode & (IoSerialize | IoSerializeHexStr)) {
   288  			fp::swap_(ap, bp);
   289  		}
   290  		ap->load(pb, is, ioMode);
   291  		if (!*pb) return;
   292  		bp->load(pb, is, ioMode);
   293  	}
   294  	/*
   295  		Fp2T = <a> + ' ' + <b>
   296  	*/
   297  	template<class OutputStream>
   298  	void save(bool *pb, OutputStream& os, int ioMode) const
   299  	{
   300  		const Fp *ap = &a, *bp = &b;
   301  		if (Fp::isETHserialization_ && ioMode & (IoSerialize | IoSerializeHexStr)) {
   302  			fp::swap_(ap, bp);
   303  		}
   304  		const char sep = *fp::getIoSeparator(ioMode);
   305  		ap->save(pb, os, ioMode);
   306  		if (!*pb) return;
   307  		if (sep) {
   308  			cybozu::writeChar(pb, os, sep);
   309  			if (!*pb) return;
   310  		}
   311  		bp->save(pb, os, ioMode);
   312  	}
   313  	bool isZero() const { return a.isZero() && b.isZero(); }
   314  	bool isOne() const { return a.isOne() && b.isZero(); }
   315  	bool operator==(const Fp2T& rhs) const { return a == rhs.a && b == rhs.b; }
   316  	bool operator!=(const Fp2T& rhs) const { return !operator==(rhs); }
   317  	/*
   318  		return true is a is odd (do not consider b)
   319  		this function is for only compressed reprezentation of EC
   320  		isOdd() is not good naming. QQQ
   321  	*/
   322  	bool isOdd() const { return a.isOdd(); }
   323  	/*
   324  		(a + bi)^2 = (a^2 - b^2) + 2ab i = c + di
   325  		A = a^2
   326  		B = b^2
   327  		A = (c +/- sqrt(c^2 + d^2))/2
   328  		b = d / 2a
   329  	*/
   330  	static inline bool squareRoot(Fp2T& y, const Fp2T& x)
   331  	{
   332  		Fp t1, t2;
   333  		if (x.b.isZero()) {
   334  			if (Fp::squareRoot(t1, x.a)) {
   335  				y.a = t1;
   336  				y.b.clear();
   337  			} else {
   338  				bool b = Fp::squareRoot(t1, -x.a);
   339  				assert(b); (void)b;
   340  				y.a.clear();
   341  				y.b = t1;
   342  			}
   343  			return true;
   344  		}
   345  		Fp::sqr(t1, x.a);
   346  		Fp::sqr(t2, x.b);
   347  		t1 += t2; // c^2 + d^2
   348  		if (!Fp::squareRoot(t1, t1)) return false;
   349  		Fp::add(t2, x.a, t1);
   350  		Fp::divBy2(t2, t2);
   351  		if (!Fp::squareRoot(t2, t2)) {
   352  			Fp::sub(t2, x.a, t1);
   353  			Fp::divBy2(t2, t2);
   354  			bool b = Fp::squareRoot(t2, t2);
   355  			assert(b); (void)b;
   356  		}
   357  		y.a = t2;
   358  		t2 += t2;
   359  		Fp::inv(t2, t2);
   360  		Fp::mul(y.b, x.b, t2);
   361  		return true;
   362  	}
   363  	static void inline norm(Fp& y, const Fp2T& x)
   364  	{
   365  		Fp aa, bb;
   366  		Fp::sqr(aa, x.a);
   367  		Fp::sqr(bb, x.b);
   368  		Fp::add(y, aa, bb);
   369  	}
   370  	/*
   371  		Frobenius
   372  		i^2 = -1
   373  		(a + bi)^p = a + bi^p in Fp
   374  		= a + bi if p = 1 mod 4
   375  		= a - bi if p = 3 mod 4
   376  	*/
   377  	static void Frobenius(Fp2T& y, const Fp2T& x)
   378  	{
   379  		if (Fp::getOp().pmod4 == 1) {
   380  			if (&y != &x) {
   381  				y = x;
   382  			}
   383  		} else {
   384  			if (&y != &x) {
   385  				y.a = x.a;
   386  			}
   387  			Fp::neg(y.b, x.b);
   388  		}
   389  	}
   390  
   391  	static uint32_t get_xi_a() { return Fp::getOp().xi_a; }
   392  	static void init()
   393  	{
   394  //		assert(Fp::maxSize <= 256);
   395  		mcl::fp::Op& op = Fp::op_;
   396  		assert(op.xi_a);
   397  		mul_xi = 0;
   398  #ifdef MCL_XBYAK_DIRECT_CALL
   399  		add = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_addA_);
   400  		if (add == 0) add = addC;
   401  		sub = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_subA_);
   402  		if (sub == 0) sub = subC;
   403  		neg = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_negA_);
   404  		if (neg == 0) neg = negC;
   405  		mul = fp::func_ptr_cast<void (*)(Fp2T& z, const Fp2T& x, const Fp2T& y)>(op.fp2_mulA_);
   406  		if (mul == 0) mul = mulC;
   407  		sqr = fp::func_ptr_cast<void (*)(Fp2T& y, const Fp2T& x)>(op.fp2_sqrA_);
   408  		if (sqr == 0) sqr = sqrC;
   409  		mul_xi = fp::func_ptr_cast<void (*)(Fp2T&, const Fp2T&)>(op.fp2_mul_xiA_);
   410  #endif
   411  		op.fp2_inv = fp2_invW;
   412  		if (mul_xi == 0) {
   413  			if (op.xi_a == 1) {
   414  				mul_xi = fp2_mul_xi_1_1iC;
   415  			} else {
   416  				mul_xi = fp2_mul_xiC;
   417  			}
   418  		}
   419  		FpDblT<Fp>::init();
   420  		Fp2DblT<Fp>::init();
   421  		// call init before Fp2::pow because FpDbl is used in Fp2T
   422  		const Fp2T xi(op.xi_a, 1);
   423  		const mpz_class& p = Fp::getOp().mp;
   424  		Fp2T::pow(g[0], xi, (p - 1) / 6); // g = xi^((p-1)/6)
   425  		for (size_t i = 1; i < gN; i++) {
   426  			g[i] = g[i - 1] * g[0];
   427  		}
   428  		/*
   429  			permutate [0, 1, 2, 3, 4] => [1, 3, 0, 2, 4]
   430  			g[0] = g^2
   431  			g[1] = g^4
   432  			g[2] = g^1
   433  			g[3] = g^3
   434  			g[4] = g^5
   435  		*/
   436  		{
   437  			Fp2T t = g[0];
   438  			g[0] = g[1];
   439  			g[1] = g[3];
   440  			g[3] = g[2];
   441  			g[2] = t;
   442  		}
   443  		for (size_t i = 0; i < gN; i++) {
   444  			Fp2T t(g[i].a, g[i].b);
   445  			if (Fp::getOp().pmod4 == 3) Fp::neg(t.b, t.b);
   446  			Fp2T::mul(g2[i], t, g[i]);
   447  			g3[i] = g[i] * g2[i];
   448  		}
   449  	}
   450  #ifndef CYBOZU_DONT_USE_EXCEPTION
   451  	template<class InputStream>
   452  	void load(InputStream& is, int ioMode = IoSerialize)
   453  	{
   454  		bool b;
   455  		load(&b, is, ioMode);
   456  		if (!b) throw cybozu::Exception("Fp2T:load");
   457  	}
   458  	template<class OutputStream>
   459  	void save(OutputStream& os, int ioMode = IoSerialize) const
   460  	{
   461  		bool b;
   462  		save(&b, os, ioMode);
   463  		if (!b) throw cybozu::Exception("Fp2T:save");
   464  	}
   465  	template<class S>
   466  	void setArray(const S *buf, size_t n)
   467  	{
   468  		bool b;
   469  		setArray(&b, buf, n);
   470  		if (!b) throw cybozu::Exception("Fp2T:setArray");
   471  	}
   472  #endif
   473  #ifndef CYBOZU_DONT_USE_STRING
   474  	Fp2T(const std::string& a, const std::string& b, int base = 0) : a(a, base), b(b, base) {}
   475  	friend std::istream& operator>>(std::istream& is, Fp2T& self)
   476  	{
   477  		self.load(is, fp::detectIoMode(Fp::BaseFp::getIoMode(), is));
   478  		return is;
   479  	}
   480  	friend std::ostream& operator<<(std::ostream& os, const Fp2T& self)
   481  	{
   482  		self.save(os, fp::detectIoMode(Fp::BaseFp::getIoMode(), os));
   483  		return os;
   484  	}
   485  #endif
   486  private:
   487  	/*
   488  		default Fp2T operator
   489  		Fp2T = Fp[i]/(i^2 + 1)
   490  	*/
   491  	static void addC(Fp2T& z, const Fp2T& x, const Fp2T& y)
   492  	{
   493  		Fp::add(z.a, x.a, y.a);
   494  		Fp::add(z.b, x.b, y.b);
   495  	}
   496  	static void subC(Fp2T& z, const Fp2T& x, const Fp2T& y)
   497  	{
   498  		Fp::sub(z.a, x.a, y.a);
   499  		Fp::sub(z.b, x.b, y.b);
   500  	}
   501  	static void negC(Fp2T& y, const Fp2T& x)
   502  	{
   503  		Fp::neg(y.a, x.a);
   504  		Fp::neg(y.b, x.b);
   505  	}
   506  #if 0
   507  	/*
   508  		x = a + bi, y = c + di, i^2 = -1
   509  		z = xy = (a + bi)(c + di) = (ac - bd) + (ad + bc)i
   510  		ad+bc = (a + b)(c + d) - ac - bd
   511  		# of mod = 3
   512  	*/
   513  	static void fp2_mulW(Unit *z, const Unit *x, const Unit *y)
   514  	{
   515  		const Fp *px = reinterpret_cast<const Fp*>(x);
   516  		const Fp *py = reinterpret_cast<const Fp*>(y);
   517  		const Fp& a = px[0];
   518  		const Fp& b = px[1];
   519  		const Fp& c = py[0];
   520  		const Fp& d = py[1];
   521  		Fp *pz = reinterpret_cast<Fp*>(z);
   522  		Fp t1, t2, ac, bd;
   523  		Fp::add(t1, a, b);
   524  		Fp::add(t2, c, d);
   525  		t1 *= t2; // (a + b)(c + d)
   526  		Fp::mul(ac, a, c);
   527  		Fp::mul(bd, b, d);
   528  		Fp::sub(pz[0], ac, bd); // ac - bd
   529  		Fp::sub(pz[1], t1, ac);
   530  		pz[1] -= bd;
   531  	}
   532  	static void fp2_mulNFW(Fp2T& z, const Fp2T& x, const Fp2T& y)
   533  	{
   534  		const fp::Op& op = Fp::op_;
   535  		op.fp2_mulNF((Unit*)&z, (const Unit*)&x, (const Unit*)&y, op.p);
   536  	}
   537  #endif
   538  	static void mulC(Fp2T& z, const Fp2T& x, const Fp2T& y)
   539  	{
   540  		Fp2Dbl d;
   541  		Fp2Dbl::mulPre(d, x, y);
   542  		FpDbl::mod(z.a, d.a);
   543  		FpDbl::mod(z.b, d.b);
   544  	}
   545  	/*
   546  		x = a + bi, i^2 = -1
   547  		y = x^2 = (a + bi)^2 = (a + b)(a - b) + 2abi
   548  	*/
   549  	static void sqrC(Fp2T& y, const Fp2T& x)
   550  	{
   551  		const Fp& a = x.a;
   552  		const Fp& b = x.b;
   553  #if 1 // faster than using FpDbl
   554  		Fp t1, t2, t3;
   555  		Fp::add(t1, b, b); // 2b
   556  		t1 *= a; // 2ab
   557  		Fp::add(t2, a, b); // a + b
   558  		Fp::sub(t3, a, b); // a - b
   559  		Fp::mul(y.a, t2, t3); // (a + b)(a - b)
   560  		y.b = t1;
   561  #else
   562  		Fp t1, t2;
   563  		FpDbl d1, d2;
   564  		Fp::addPre(t1, b, b); // 2b
   565  		FpDbl::mulPre(d2, t1, a); // 2ab
   566  		Fp::addPre(t1, a, b); // a + b
   567  		Fp::sub(t2, a, b); // a - b
   568  		FpDbl::mulPre(d1, t1, t2); // (a + b)(a - b)
   569  		FpDbl::mod(py[0], d1);
   570  		FpDbl::mod(py[1], d2);
   571  #endif
   572  	}
   573  	/*
   574  		xi = xi_a + i
   575  		x = a + bi
   576  		y = (a + bi)xi = (a + bi)(xi_a + i)
   577  		=(a * x_ia - b) + (a + b xi_a)i
   578  	*/
   579  	static void fp2_mul_xiC(Fp2T& y, const Fp2T& x)
   580  	{
   581  		const Fp& a = x.a;
   582  		const Fp& b = x.b;
   583  		Fp t;
   584  		Fp::mulUnit(t, a, Fp::getOp().xi_a);
   585  		t -= b;
   586  		Fp::mulUnit(y.b, b, Fp::getOp().xi_a);
   587  		y.b += a;
   588  		y.a = t;
   589  	}
   590  	/*
   591  		xi = 1 + i ; xi_a = 1
   592  		y = (a + bi)xi = (a - b) + (a + b)i
   593  	*/
   594  	static void fp2_mul_xi_1_1iC(Fp2T& y, const Fp2T& x)
   595  	{
   596  		const Fp& a = x.a;
   597  		const Fp& b = x.b;
   598  		Fp t;
   599  		Fp::add(t, a, b);
   600  		Fp::sub(y.a, a, b);
   601  		y.b = t;
   602  	}
   603  	/*
   604  		x = a + bi
   605  		1 / x = (a - bi) / (a^2 + b^2)
   606  	*/
   607  	static void fp2_invW(Unit *y, const Unit *x)
   608  	{
   609  		const Fp *px = reinterpret_cast<const Fp*>(x);
   610  		Fp *py = reinterpret_cast<Fp*>(y);
   611  		const Fp& a = px[0];
   612  		const Fp& b = px[1];
   613  		Fp aa, bb;
   614  		Fp::sqr(aa, a);
   615  		Fp::sqr(bb, b);
   616  		aa += bb;
   617  		Fp::inv(aa, aa); // aa = 1 / (a^2 + b^2)
   618  		Fp::mul(py[0], a, aa);
   619  		Fp::mul(py[1], b, aa);
   620  		Fp::neg(py[1], py[1]);
   621  	}
   622  };
   623  
   624  #ifdef MCL_XBYAK_DIRECT_CALL
   625  template<class Fp_> void (*Fp2T<Fp_>::add)(Fp2T& z, const Fp2T& x, const Fp2T& y);
   626  template<class Fp_> void (*Fp2T<Fp_>::sub)(Fp2T& z, const Fp2T& x, const Fp2T& y);
   627  template<class Fp_> void (*Fp2T<Fp_>::neg)(Fp2T& y, const Fp2T& x);
   628  template<class Fp_> void (*Fp2T<Fp_>::mul)(Fp2T& z, const Fp2T& x, const Fp2T& y);
   629  template<class Fp_> void (*Fp2T<Fp_>::sqr)(Fp2T& y, const Fp2T& x);
   630  #endif
   631  template<class Fp_> void (*Fp2T<Fp_>::mul_xi)(Fp2T& y, const Fp2T& x);
   632  
   633  template<class Fp>
   634  struct Fp2DblT {
   635  	typedef FpDblT<Fp> FpDbl;
   636  	typedef Fp2T<Fp> Fp2;
   637  	typedef fp::Unit Unit;
   638  	FpDbl a, b;
   639  	static void add(Fp2DblT& z, const Fp2DblT& x, const Fp2DblT& y)
   640  	{
   641  		FpDbl::add(z.a, x.a, y.a);
   642  		FpDbl::add(z.b, x.b, y.b);
   643  	}
   644  	static void addPre(Fp2DblT& z, const Fp2DblT& x, const Fp2DblT& y)
   645  	{
   646  		FpDbl::addPre(z.a, x.a, y.a);
   647  		FpDbl::addPre(z.b, x.b, y.b);
   648  	}
   649  	static void sub(Fp2DblT& z, const Fp2DblT& x, const Fp2DblT& y)
   650  	{
   651  		FpDbl::sub(z.a, x.a, y.a);
   652  		FpDbl::sub(z.b, x.b, y.b);
   653  	}
   654  	static void subPre(Fp2DblT& z, const Fp2DblT& x, const Fp2DblT& y)
   655  	{
   656  		FpDbl::subPre(z.a, x.a, y.a);
   657  		FpDbl::subPre(z.b, x.b, y.b);
   658  	}
   659  	static void neg(Fp2DblT& y, const Fp2DblT& x)
   660  	{
   661  		FpDbl::neg(y.a, x.a);
   662  		FpDbl::neg(y.b, x.b);
   663  	}
   664  	static void mul_xi(Fp2DblT& y, const Fp2DblT& x)
   665  	{
   666  		const uint32_t xi_a = Fp2::get_xi_a();
   667  		if (xi_a == 1) {
   668  			FpDbl t;
   669  			FpDbl::add(t, x.a, x.b);
   670  			FpDbl::sub(y.a, x.a, x.b);
   671  			y.b = t;
   672  		} else {
   673  			FpDbl t;
   674  			FpDbl::mulUnit(t, x.a, xi_a);
   675  			FpDbl::sub(t, t, x.b);
   676  			FpDbl::mulUnit(y.b, x.b, xi_a);
   677  			FpDbl::add(y.b, y.b, x.a);
   678  			y.a = t;
   679  		}
   680  	}
   681  	static void (*mulPre)(Fp2DblT&, const Fp2&, const Fp2&);
   682  	static void (*sqrPre)(Fp2DblT&, const Fp2&);
   683  	static void mod(Fp2& y, const Fp2DblT& x)
   684  	{
   685  		FpDbl::mod(y.a, x.a);
   686  		FpDbl::mod(y.b, x.b);
   687  	}
   688  #ifndef CYBOZU_DONT_USE_STRING
   689  	friend std::ostream& operator<<(std::ostream& os, const Fp2DblT& x)
   690  	{
   691  		return os << x.a << ' ' << x.b;
   692  	}
   693  #endif
   694  	void operator+=(const Fp2DblT& x) { add(*this, *this, x); }
   695  	void operator-=(const Fp2DblT& x) { sub(*this, *this, x); }
   696  	static void init()
   697   	{
   698  		const mcl::fp::Op& op = Fp::getOp();
   699  		if (op.fp2Dbl_mulPreA_) {
   700  			mulPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&, const Fp2&)>(op.fp2Dbl_mulPreA_);
   701  		} else {
   702  			if (op.isFullBit) {
   703  				mulPre = fp2Dbl_mulPreW<true>;
   704  			} else {
   705  				mulPre = fp2Dbl_mulPreW<false>;
   706  			}
   707  		}
   708  		if (op.fp2Dbl_sqrPreA_) {
   709  			sqrPre = fp::func_ptr_cast<void (*)(Fp2DblT&, const Fp2&)>(op.fp2Dbl_sqrPreA_);
   710  		} else {
   711  			if (op.isFullBit) {
   712  				sqrPre = fp2Dbl_sqrPreW<true>;
   713  			} else {
   714  				sqrPre = fp2Dbl_sqrPreW<false>;
   715  			}
   716  		}
   717  	}
   718  	/*
   719  		Fp2Dbl::mulPre by FpDblT
   720  		@note mod of NIST_P192 is fast
   721  	*/
   722  	template<bool isFullBit>
   723  	static void fp2Dbl_mulPreW(Fp2DblT& z, const Fp2& x, const Fp2& y)
   724  	{
   725  		const Fp& a = x.a;
   726  		const Fp& b = x.b;
   727  		const Fp& c = y.a;
   728  		const Fp& d = y.b;
   729  		FpDbl& d0 = z.a;
   730  		FpDbl& d1 = z.b;
   731  		FpDbl d2;
   732  		Fp s, t;
   733  		if (isFullBit) {
   734  			Fp::add(s, a, b);
   735  			Fp::add(t, c, d);
   736  		} else {
   737  			Fp::addPre(s, a, b);
   738  			Fp::addPre(t, c, d);
   739  		}
   740  		FpDbl::mulPre(d1, s, t); // (a + b)(c + d)
   741  		FpDbl::mulPre(d0, a, c);
   742  		FpDbl::mulPre(d2, b, d);
   743  		if (isFullBit) {
   744  			FpDbl::sub(d1, d1, d0); // (a + b)(c + d) - ac
   745  			FpDbl::sub(d1, d1, d2); // (a + b)(c + d) - ac - bd
   746  		} else {
   747  			FpDbl::subPre(d1, d1, d0);
   748  			FpDbl::subPre(d1, d1, d2);
   749  		}
   750  		FpDbl::sub(d0, d0, d2); // ac - bd
   751  	}
   752  	template<bool isFullBit>
   753  	static void fp2Dbl_sqrPreW(Fp2DblT& y, const Fp2& x)
   754  	{
   755  		Fp t1, t2;
   756  		if (isFullBit) {
   757  			Fp::add(t1, x.b, x.b); // 2b
   758  			Fp::add(t2, x.a, x.b); // a + b
   759  		} else {
   760  			Fp::addPre(t1, x.b, x.b); // 2b
   761  			Fp::addPre(t2, x.a, x.b); // a + b
   762  		}
   763  		FpDbl::mulPre(y.b, t1, x.a); // 2ab
   764  		Fp::sub(t1, x.a, x.b); // a - b
   765  		FpDbl::mulPre(y.a, t1, t2); // (a + b)(a - b)
   766  	}
   767  };
   768  
   769  template<class Fp> void (*Fp2DblT<Fp>::mulPre)(Fp2DblT&, const Fp2T<Fp>&, const Fp2T<Fp>&);
   770  template<class Fp> void (*Fp2DblT<Fp>::sqrPre)(Fp2DblT&, const Fp2T<Fp>&);
   771  
   772  template<class Fp> Fp2T<Fp> Fp2T<Fp>::g[Fp2T<Fp>::gN];
   773  template<class Fp> Fp2T<Fp> Fp2T<Fp>::g2[Fp2T<Fp>::gN];
   774  template<class Fp> Fp2T<Fp> Fp2T<Fp>::g3[Fp2T<Fp>::gN];
   775  
   776  template<class Fp>
   777  struct Fp6DblT;
   778  /*
   779  	Fp6T = Fp2[v] / (v^3 - xi)
   780  	x = a + b v + c v^2
   781  */
   782  template<class _Fp>
   783  struct Fp6T : public fp::Serializable<Fp6T<_Fp>,
   784  	fp::Operator<Fp6T<_Fp> > > {
   785  	typedef _Fp Fp;
   786  	typedef Fp2T<Fp> Fp2;
   787  	typedef Fp2DblT<Fp> Fp2Dbl;
   788  	typedef Fp6DblT<Fp> Fp6Dbl;
   789  	typedef Fp BaseFp;
   790  	Fp2 a, b, c;
   791  	Fp6T() { }
   792  	Fp6T(int64_t a) : a(a) , b(0) , c(0) { }
   793  	Fp6T(const Fp2& a, const Fp2& b, const Fp2& c) : a(a) , b(b) , c(c) { }
   794  	void clear()
   795  	{
   796  		a.clear();
   797  		b.clear();
   798  		c.clear();
   799  	}
   800  	Fp* getFp0() { return a.getFp0(); }
   801  	const Fp* getFp0() const { return a.getFp0(); }
   802  	Fp2* getFp2() { return &a; }
   803  	const Fp2* getFp2() const { return &a; }
   804  	void set(const Fp2 &a_, const Fp2 &b_, const Fp2 &c_)
   805  	{
   806  		a = a_;
   807  		b = b_;
   808  		c = c_;
   809  	}
   810  	bool isZero() const
   811  	{
   812  		return a.isZero() && b.isZero() && c.isZero();
   813  	}
   814  	bool isOne() const
   815  	{
   816  		return a.isOne() && b.isZero() && c.isZero();
   817  	}
   818  	bool operator==(const Fp6T& rhs) const
   819  	{
   820  		return a == rhs.a && b == rhs.b && c == rhs.c;
   821  	}
   822  	bool operator!=(const Fp6T& rhs) const { return !operator==(rhs); }
   823  	template<class InputStream>
   824  	void load(bool *pb, InputStream& is, int ioMode)
   825  	{
   826  		a.load(pb, is, ioMode); if (!*pb) return;
   827  		b.load(pb, is, ioMode); if (!*pb) return;
   828  		c.load(pb, is, ioMode); if (!*pb) return;
   829  	}
   830  	template<class OutputStream>
   831  	void save(bool *pb, OutputStream& os, int ioMode) const
   832  	{
   833  		const char sep = *fp::getIoSeparator(ioMode);
   834  		a.save(pb, os, ioMode); if (!*pb) return;
   835  		if (sep) {
   836  			cybozu::writeChar(pb, os, sep);
   837  			if (!*pb) return;
   838  		}
   839  		b.save(pb, os, ioMode); if (!*pb) return;
   840  		if (sep) {
   841  			cybozu::writeChar(pb, os, sep);
   842  			if (!*pb) return;
   843  		}
   844  		c.save(pb, os, ioMode);
   845  	}
   846  #ifndef CYBOZU_DONT_USE_EXCEPTION
   847  	template<class InputStream>
   848  	void load(InputStream& is, int ioMode = IoSerialize)
   849  	{
   850  		bool b;
   851  		load(&b, is, ioMode);
   852  		if (!b) throw cybozu::Exception("Fp6T:load");
   853  	}
   854  	template<class OutputStream>
   855  	void save(OutputStream& os, int ioMode = IoSerialize) const
   856  	{
   857  		bool b;
   858  		save(&b, os, ioMode);
   859  		if (!b) throw cybozu::Exception("Fp6T:save");
   860  	}
   861  #endif
   862  #ifndef CYBOZU_DONT_USE_STRING
   863  	friend std::istream& operator>>(std::istream& is, Fp6T& self)
   864  	{
   865  		self.load(is, fp::detectIoMode(Fp::BaseFp::getIoMode(), is));
   866  		return is;
   867  	}
   868  	friend std::ostream& operator<<(std::ostream& os, const Fp6T& self)
   869  	{
   870  		self.save(os, fp::detectIoMode(Fp::BaseFp::getIoMode(), os));
   871  		return os;
   872  	}
   873  #endif
   874  	static void add(Fp6T& z, const Fp6T& x, const Fp6T& y)
   875  	{
   876  		Fp2::add(z.a, x.a, y.a);
   877  		Fp2::add(z.b, x.b, y.b);
   878  		Fp2::add(z.c, x.c, y.c);
   879  	}
   880  	static void sub(Fp6T& z, const Fp6T& x, const Fp6T& y)
   881  	{
   882  		Fp2::sub(z.a, x.a, y.a);
   883  		Fp2::sub(z.b, x.b, y.b);
   884  		Fp2::sub(z.c, x.c, y.c);
   885  	}
   886  	static void neg(Fp6T& y, const Fp6T& x)
   887  	{
   888  		Fp2::neg(y.a, x.a);
   889  		Fp2::neg(y.b, x.b);
   890  		Fp2::neg(y.c, x.c);
   891  	}
   892  	/*
   893  		x = a + bv + cv^2, v^3 = xi
   894  		x^2 = (a^2 + 2bc xi) + (c^2 xi + 2ab)v + (b^2 + 2ac)v^2
   895  
   896  		b^2 + 2ac = (a + b + c)^2 - a^2 - 2bc - c^2 - 2ab
   897  	*/
   898  	static void sqr(Fp6T& y, const Fp6T& x)
   899  	{
   900  		Fp2 t1, t2, t3;
   901  		Fp2::mul(t1, x.a, x.b);
   902  		t1 += t1; // 2ab
   903  		Fp2::mul(t2, x.b, x.c);
   904  		t2 += t2; // 2bc
   905  		Fp2::sqr(t3, x.c); // c^2
   906  		Fp2::add(y.c, x.a, x.c); // a + c, destroy y.c
   907  		y.c += x.b; // a + b + c
   908  		Fp2::sqr(y.b, y.c); // (a + b + c)^2, destroy y.b
   909  		y.b -= t2; // (a + b + c)^2 - 2bc
   910  		Fp2::mul_xi(t2, t2); // 2bc xi
   911  		Fp2::sqr(y.a, x.a); // a^2, destroy y.a
   912  		y.b -= y.a; // (a + b + c)^2 - 2bc - a^2
   913  		y.a += t2; // a^2 + 2bc xi
   914  		Fp2::sub(y.c, y.b, t3); // (a + b + c)^2 - 2bc - a^2 - c^2
   915  		Fp2::mul_xi(y.b, t3); // c^2 xi
   916  		y.b += t1; // c^2 xi + 2ab
   917  		y.c -= t1; // b^2 + 2ac
   918  	}
   919  	static inline void mul(Fp6T& z, const Fp6T& x, const Fp6T& y);
   920  	/*
   921  		x = a + bv + cv^2, v^3 = xi
   922  		y = 1/x = p/q where
   923  		p = (a^2 - bc xi) + (c^2 xi - ab)v + (b^2 - ac)v^2
   924  		q = c^3 xi^2 + b(b^2 - 3ac)xi + a^3
   925  		  = (a^2 - bc xi)a + ((c^2 xi - ab)c + (b^2 - ac)b) xi
   926  	*/
   927  	static void inv(Fp6T& y, const Fp6T& x)
   928  	{
   929  		const Fp2& a = x.a;
   930  		const Fp2& b = x.b;
   931  		const Fp2& c = x.c;
   932  		Fp2 aa, bb, cc, ab, bc, ac;
   933  		Fp2::sqr(aa, a);
   934  		Fp2::sqr(bb, b);
   935  		Fp2::sqr(cc, c);
   936  		Fp2::mul(ab, a, b);
   937  		Fp2::mul(bc, b, c);
   938  		Fp2::mul(ac, c, a);
   939  
   940  		Fp6T p;
   941  		Fp2::mul_xi(p.a, bc);
   942  		Fp2::sub(p.a, aa, p.a); // a^2 - bc xi
   943  		Fp2::mul_xi(p.b, cc);
   944  		p.b -= ab; // c^2 xi - ab
   945  		Fp2::sub(p.c, bb, ac); // b^2 - ac
   946  		Fp2 q, t;
   947  		Fp2::mul(q, p.b, c);
   948  		Fp2::mul(t, p.c, b);
   949  		q += t;
   950  		Fp2::mul_xi(q, q);
   951  		Fp2::mul(t, p.a, a);
   952  		q += t;
   953  		Fp2::inv(q, q);
   954  
   955  		Fp2::mul(y.a, p.a, q);
   956  		Fp2::mul(y.b, p.b, q);
   957  		Fp2::mul(y.c, p.c, q);
   958  	}
   959  };
   960  
   961  template<class Fp>
   962  struct Fp6DblT {
   963  	typedef Fp2T<Fp> Fp2;
   964  	typedef Fp6T<Fp> Fp6;
   965  	typedef Fp2DblT<Fp> Fp2Dbl;
   966  	typedef Fp6DblT<Fp> Fp6Dbl;
   967  	typedef fp::Unit Unit;
   968  	Fp2Dbl a, b, c;
   969  	static void add(Fp6Dbl& z, const Fp6Dbl& x, const Fp6Dbl& y)
   970  	{
   971  		Fp2Dbl::add(z.a, x.a, y.a);
   972  		Fp2Dbl::add(z.b, x.b, y.b);
   973  		Fp2Dbl::add(z.c, x.c, y.c);
   974  	}
   975  	static void sub(Fp6Dbl& z, const Fp6Dbl& x, const Fp6Dbl& y)
   976  	{
   977  		Fp2Dbl::sub(z.a, x.a, y.a);
   978  		Fp2Dbl::sub(z.b, x.b, y.b);
   979  		Fp2Dbl::sub(z.c, x.c, y.c);
   980  	}
   981  	/*
   982  		x = a + bv + cv^2, y = d + ev + fv^2, v^3 = xi
   983  		xy = (ad + (bf + ce)xi) + ((ae + bd) + cf xi)v + ((af + cd) + be)v^2
   984  		bf + ce = (b + c)(e + f) - be - cf
   985  		ae + bd = (a + b)(e + d) - ad - be
   986  		af + cd = (a + c)(d + f) - ad - cf
   987  	*/
   988  	static void mulPre(Fp6DblT& z, const Fp6& x, const Fp6& y)
   989  	{
   990  //clk.begin();
   991  		const Fp2& a = x.a;
   992  		const Fp2& b = x.b;
   993  		const Fp2& c = x.c;
   994  		const Fp2& d = y.a;
   995  		const Fp2& e = y.b;
   996  		const Fp2& f = y.c;
   997  		Fp2Dbl& za = z.a;
   998  		Fp2Dbl& zb = z.b;
   999  		Fp2Dbl& zc = z.c;
  1000  		Fp2Dbl BE;
  1001  		Fp2Dbl::mulPre(za, a, d);
  1002  		Fp2Dbl::mulPre(BE, b, e);
  1003  		Fp2Dbl::mulPre(zb, c, f);
  1004  
  1005  		Fp2 t1, t2, t3, t4;
  1006  		Fp2::add(t1, b, c);
  1007  		Fp2::add(t2, e, f);
  1008  		Fp2Dbl T1;
  1009  		Fp2Dbl::mulPre(T1, t1, t2);
  1010  		Fp2Dbl::sub(T1, T1, BE);
  1011  		Fp2Dbl::sub(T1, T1, zb);
  1012  		Fp2Dbl::mul_xi(T1, T1);
  1013  
  1014  		Fp2::add(t2, a, b);
  1015  		Fp2::add(t3, e, d);
  1016  		Fp2Dbl T2;
  1017  		Fp2Dbl::mulPre(T2, t2, t3);
  1018  		Fp2Dbl::sub(T2, T2, za);
  1019  		Fp2Dbl::sub(T2, T2, BE);
  1020  
  1021  		Fp2::add(t3, a, c);
  1022  		Fp2::add(t4, d, f);
  1023  		Fp2Dbl::mulPre(zc, t3, t4);
  1024  		Fp2Dbl::sub(zc, zc, za);
  1025  		Fp2Dbl::sub(zc, zc, zb);
  1026  
  1027  		Fp2Dbl::add(za, za, T1);
  1028  		Fp2Dbl::mul_xi(zb, zb);
  1029  		Fp2Dbl::add(zb, zb, T2);
  1030  		Fp2Dbl::add(zc, zc, BE);
  1031  //clk.end();
  1032  	}
  1033  	static void mod(Fp6& y, const Fp6Dbl& x)
  1034  	{
  1035  		Fp2Dbl::mod(y.a, x.a);
  1036  		Fp2Dbl::mod(y.b, x.b);
  1037  		Fp2Dbl::mod(y.c, x.c);
  1038  	}
  1039  };
  1040  
  1041  template<class Fp>
  1042  inline void Fp6T<Fp>::mul(Fp6T<Fp>& z, const Fp6T<Fp>& x, const Fp6T<Fp>& y)
  1043  {
  1044  	Fp6DblT<Fp> Z;
  1045  	Fp6DblT<Fp>::mulPre(Z, x, y);
  1046  	Fp6DblT<Fp>::mod(z, Z);
  1047  }
  1048  
  1049  /*
  1050  	Fp12T = Fp6[w] / (w^2 - v)
  1051  	x = a + b w
  1052  */
  1053  template<class Fp>
  1054  struct Fp12T : public fp::Serializable<Fp12T<Fp>,
  1055  	fp::Operator<Fp12T<Fp> > > {
  1056  	typedef Fp2T<Fp> Fp2;
  1057  	typedef Fp6T<Fp> Fp6;
  1058  	typedef Fp2DblT<Fp> Fp2Dbl;
  1059  	typedef Fp6DblT<Fp> Fp6Dbl;
  1060  	typedef Fp BaseFp;
  1061  	Fp6 a, b;
  1062  	Fp12T() {}
  1063  	Fp12T(int64_t a) : a(a), b(0) {}
  1064  	Fp12T(const Fp6& a, const Fp6& b) : a(a), b(b) {}
  1065  	void clear()
  1066  	{
  1067  		a.clear();
  1068  		b.clear();
  1069  	}
  1070  	void setOne()
  1071  	{
  1072  		clear();
  1073  		a.a.a = 1;
  1074  	}
  1075  
  1076  	Fp* getFp0() { return a.getFp0(); }
  1077  	const Fp* getFp0() const { return a.getFp0(); }
  1078  	Fp2* getFp2() { return a.getFp2(); }
  1079  	const Fp2* getFp2() const { return a.getFp2(); }
  1080  	void set(const Fp2& v0, const Fp2& v1, const Fp2& v2, const Fp2& v3, const Fp2& v4, const Fp2& v5)
  1081  	{
  1082  		a.set(v0, v1, v2);
  1083  		b.set(v3, v4, v5);
  1084  	}
  1085  
  1086  	bool isZero() const
  1087  	{
  1088  		return a.isZero() && b.isZero();
  1089  	}
  1090  	bool isOne() const
  1091  	{
  1092  		return a.isOne() && b.isZero();
  1093  	}
  1094  	bool operator==(const Fp12T& rhs) const
  1095  	{
  1096  		return a == rhs.a && b == rhs.b;
  1097  	}
  1098  	bool operator!=(const Fp12T& rhs) const { return !operator==(rhs); }
  1099  	static void add(Fp12T& z, const Fp12T& x, const Fp12T& y)
  1100  	{
  1101  		Fp6::add(z.a, x.a, y.a);
  1102  		Fp6::add(z.b, x.b, y.b);
  1103  	}
  1104  	static void sub(Fp12T& z, const Fp12T& x, const Fp12T& y)
  1105  	{
  1106  		Fp6::sub(z.a, x.a, y.a);
  1107  		Fp6::sub(z.b, x.b, y.b);
  1108  	}
  1109  	static void neg(Fp12T& z, const Fp12T& x)
  1110  	{
  1111  		Fp6::neg(z.a, x.a);
  1112  		Fp6::neg(z.b, x.b);
  1113  	}
  1114  	/*
  1115  		z = x v + y
  1116  		in Fp6 : (a + bv + cv^2)v = cv^3 + av + bv^2 = cxi + av + bv^2
  1117  	*/
  1118  	static void mulVadd(Fp6& z, const Fp6& x, const Fp6& y)
  1119  	{
  1120  		Fp2 t;
  1121  		Fp2::mul_xi(t, x.c);
  1122  		Fp2::add(z.c, x.b, y.c);
  1123  		Fp2::add(z.b, x.a, y.b);
  1124  		Fp2::add(z.a, t, y.a);
  1125  	}
  1126  	static void mulVadd(Fp6Dbl& z, const Fp6Dbl& x, const Fp6Dbl& y)
  1127  	{
  1128  		Fp2Dbl t;
  1129  		Fp2Dbl::mul_xi(t, x.c);
  1130  		Fp2Dbl::add(z.c, x.b, y.c);
  1131  		Fp2Dbl::add(z.b, x.a, y.b);
  1132  		Fp2Dbl::add(z.a, t, y.a);
  1133  	}
  1134  	/*
  1135  		x = a + bw, y = c + dw, w^2 = v
  1136  		z = xy = (a + bw)(c + dw) = (ac + bdv) + (ad + bc)w
  1137  		ad+bc = (a + b)(c + d) - ac - bd
  1138  
  1139  		in Fp6 : (a + bv + cv^2)v = cv^3 + av + bv^2 = cxi + av + bv^2
  1140  	*/
  1141  	static void mul(Fp12T& z, const Fp12T& x, const Fp12T& y)
  1142  	{
  1143  		// 4.7Kclk -> 4.55Kclk
  1144  		const Fp6& a = x.a;
  1145  		const Fp6& b = x.b;
  1146  		const Fp6& c = y.a;
  1147  		const Fp6& d = y.b;
  1148  		Fp6 t1, t2;
  1149  		Fp6::add(t1, a, b);
  1150  		Fp6::add(t2, c, d);
  1151  #if 1
  1152  		Fp6Dbl T, AC, BD;
  1153  		Fp6Dbl::mulPre(AC, a, c);
  1154  		Fp6Dbl::mulPre(BD, b, d);
  1155  		mulVadd(T, BD, AC);
  1156  		Fp6Dbl::mod(z.a, T);
  1157  		Fp6Dbl::mulPre(T, t1, t2); // (a + b)(c + d)
  1158  		Fp6Dbl::sub(T, T, AC);
  1159  		Fp6Dbl::sub(T, T, BD);
  1160  		Fp6Dbl::mod(z.b, T);
  1161  #else
  1162  		Fp6 ac, bd;
  1163  		t1 *= t2; // (a + b)(c + d)
  1164  		Fp6::mul(ac, a, c);
  1165  		Fp6::mul(bd, b, d);
  1166  		mulVadd(z.a, bd, ac);
  1167  		t1 -= ac;
  1168  		Fp6::sub(z.b, t1, bd);
  1169  #endif
  1170  	}
  1171  	/*
  1172  		x = a + bw, w^2 = v
  1173  		y = x^2 = (a + bw)^2 = (a^2 + b^2v) + 2abw
  1174  		a^2 + b^2v = (a + b)(bv + a) - (abv + ab)
  1175  	*/
  1176  	static void sqr(Fp12T& y, const Fp12T& x)
  1177  	{
  1178  		const Fp6& a = x.a;
  1179  		const Fp6& b = x.b;
  1180  		Fp6 t0, t1;
  1181  		Fp6::add(t0, a, b); // a + b
  1182  		mulVadd(t1, b, a); // bv + a
  1183  		t0 *= t1; // (a + b)(bv + a)
  1184  		Fp6::mul(t1, a, b); // ab
  1185  		Fp6::add(y.b, t1, t1); // 2ab
  1186  		mulVadd(y.a, t1, t1); // abv + ab
  1187  		Fp6::sub(y.a, t0, y.a);
  1188  	}
  1189  	/*
  1190  		x = a + bw, w^2 = v
  1191  		y = 1/x = (a - bw) / (a^2 - b^2v)
  1192  	*/
  1193  	static void inv(Fp12T& y, const Fp12T& x)
  1194  	{
  1195  		const Fp6& a = x.a;
  1196  		const Fp6& b = x.b;
  1197  		Fp6 t0, t1;
  1198  		Fp6::sqr(t0, a);
  1199  		Fp6::sqr(t1, b);
  1200  		Fp2::mul_xi(t1.c, t1.c);
  1201  		t0.a -= t1.c;
  1202  		t0.b -= t1.a;
  1203  		t0.c -= t1.b; // t0 = a^2 - b^2v
  1204  		Fp6::inv(t0, t0);
  1205  		Fp6::mul(y.a, x.a, t0);
  1206  		Fp6::mul(y.b, x.b, t0);
  1207  		Fp6::neg(y.b, y.b);
  1208  	}
  1209  	/*
  1210  		y = 1 / x = conjugate of x if |x| = 1
  1211  	*/
  1212  	static void unitaryInv(Fp12T& y, const Fp12T& x)
  1213  	{
  1214  		if (&y != &x) y.a = x.a;
  1215  		Fp6::neg(y.b, x.b);
  1216  	}
  1217  	/*
  1218  		Frobenius
  1219  		i^2 = -1
  1220  		(a + bi)^p = a + bi^p in Fp
  1221  		= a + bi if p = 1 mod 4
  1222  		= a - bi if p = 3 mod 4
  1223  
  1224  		g = xi^(p - 1) / 6
  1225  		v^3 = xi in Fp2
  1226  		v^p = ((v^6) ^ (p-1)/6) v = g^2 v
  1227  		v^2p = g^4 v^2
  1228  		(a + bv + cv^2)^p in Fp6
  1229  		= F(a) + F(b)g^2 v + F(c) g^4 v^2
  1230  
  1231  		w^p = ((w^6) ^ (p-1)/6) w = g w
  1232  		((a + bv + cv^2)w)^p in Fp12T
  1233  		= (F(a) g + F(b) g^3 v + F(c) g^5 v^2)w
  1234  	*/
  1235  	static void Frobenius(Fp12T& y, const Fp12T& x)
  1236  	{
  1237  		for (int i = 0; i < 6; i++) {
  1238  			Fp2::Frobenius(y.getFp2()[i], x.getFp2()[i]);
  1239  		}
  1240  		for (int i = 1; i < 6; i++) {
  1241  			y.getFp2()[i] *= Fp2::get_gTbl()[i - 1];
  1242  		}
  1243  	}
  1244  	static void Frobenius2(Fp12T& y, const Fp12T& x)
  1245  	{
  1246  #if 0
  1247  		Frobenius(y, x);
  1248  		Frobenius(y, y);
  1249  #else
  1250  		y.getFp2()[0] = x.getFp2()[0];
  1251  		if (Fp::getOp().pmod4 == 1) {
  1252  			for (int i = 1; i < 6; i++) {
  1253  				Fp2::mul(y.getFp2()[i], x.getFp2()[i], Fp2::get_g2Tbl()[i]);
  1254  			}
  1255  		} else {
  1256  			for (int i = 1; i < 6; i++) {
  1257  				Fp2::mulFp(y.getFp2()[i], x.getFp2()[i], Fp2::get_g2Tbl()[i - 1].a);
  1258  			}
  1259  		}
  1260  #endif
  1261  	}
  1262  	static void Frobenius3(Fp12T& y, const Fp12T& x)
  1263  	{
  1264  #if 0
  1265  		Frobenius(y, x);
  1266  		Frobenius(y, y);
  1267  		Frobenius(y, y);
  1268  #else
  1269  		Fp2::Frobenius(y.getFp2()[0], x.getFp2()[0]);
  1270  		for (int i = 1; i < 6; i++) {
  1271  			Fp2::Frobenius(y.getFp2()[i], x.getFp2()[i]);
  1272  			y.getFp2()[i] *= Fp2::get_g3Tbl()[i - 1];
  1273  		}
  1274  #endif
  1275  	}
  1276  	template<class InputStream>
  1277  	void load(bool *pb, InputStream& is, int ioMode)
  1278  	{
  1279  		a.load(pb, is, ioMode); if (!*pb) return;
  1280  		b.load(pb, is, ioMode);
  1281  	}
  1282  	template<class OutputStream>
  1283  	void save(bool *pb, OutputStream& os, int ioMode) const
  1284  	{
  1285  		const char sep = *fp::getIoSeparator(ioMode);
  1286  		a.save(pb, os, ioMode); if (!*pb) return;
  1287  		if (sep) {
  1288  			cybozu::writeChar(pb, os, sep);
  1289  			if (!*pb) return;
  1290  		}
  1291  		b.save(pb, os, ioMode);
  1292  	}
  1293  #ifndef CYBOZU_DONT_USE_EXCEPTION
  1294  	template<class InputStream>
  1295  	void load(InputStream& is, int ioMode = IoSerialize)
  1296  	{
  1297  		bool b;
  1298  		load(&b, is, ioMode);
  1299  		if (!b) throw cybozu::Exception("Fp12T:load");
  1300  	}
  1301  	template<class OutputStream>
  1302  	void save(OutputStream& os, int ioMode = IoSerialize) const
  1303  	{
  1304  		bool b;
  1305  		save(&b, os, ioMode);
  1306  		if (!b) throw cybozu::Exception("Fp12T:save");
  1307  	}
  1308  #endif
  1309  #ifndef CYBOZU_DONT_USE_STRING
  1310  	friend std::istream& operator>>(std::istream& is, Fp12T& self)
  1311  	{
  1312  		self.load(is, fp::detectIoMode(Fp::BaseFp::getIoMode(), is));
  1313  		return is;
  1314  	}
  1315  	friend std::ostream& operator<<(std::ostream& os, const Fp12T& self)
  1316  	{
  1317  		self.save(os, fp::detectIoMode(Fp::BaseFp::getIoMode(), os));
  1318  		return os;
  1319  	}
  1320  #endif
  1321  };
  1322  
  1323  /*
  1324  	convert multiplicative group to additive group
  1325  */
  1326  template<class T>
  1327  struct GroupMtoA : public T {
  1328  	static T& castT(GroupMtoA& x) { return static_cast<T&>(x); }
  1329  	static const T& castT(const GroupMtoA& x) { return static_cast<const T&>(x); }
  1330  	void clear()
  1331  	{
  1332  		castT(*this) = 1;
  1333  	}
  1334  	bool isZero() const { return castT(*this).isOne(); }
  1335  	static void add(GroupMtoA& z, const GroupMtoA& x, const GroupMtoA& y)
  1336  	{
  1337  		T::mul(castT(z), castT(x), castT(y));
  1338  	}
  1339  	static void dbl(GroupMtoA& y, const GroupMtoA& x)
  1340  	{
  1341  		T::sqr(castT(y), castT(x));
  1342  	}
  1343  	static void neg(GroupMtoA& y, const GroupMtoA& x)
  1344  	{
  1345  		// assume Fp12
  1346  		T::unitaryInv(castT(y), castT(x));
  1347  	}
  1348  	static void Frobenus(GroupMtoA& y, const GroupMtoA& x)
  1349  	{
  1350  		T::Frobenius(castT(y), castT(x));
  1351  	}
  1352  	template<class INT>
  1353  	static void mul(GroupMtoA& z, const GroupMtoA& x, const INT& y)
  1354  	{
  1355  		T::pow(castT(z), castT(x), y);
  1356  	}
  1357  	template<class INT>
  1358  	static void mulGeneric(GroupMtoA& z, const GroupMtoA& x, const INT& y)
  1359  	{
  1360  		T::powGeneric(castT(z), castT(x), y);
  1361  	}
  1362  	void operator+=(const GroupMtoA& rhs)
  1363  	{
  1364  		add(*this, *this, rhs);
  1365  	}
  1366  	void normalize() {}
  1367  private:
  1368  	bool isOne() const;
  1369  };
  1370  
  1371  } // mcl
  1372