github.com/s1s1ty/go@v0.0.0-20180207192209-104445e3140f/src/crypto/elliptic/p256_s390x.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // +build s390x
     6  
     7  package elliptic
     8  
     9  import (
    10  	"crypto/subtle"
    11  	"math/big"
    12  )
    13  
    14  type p256CurveFast struct {
    15  	*CurveParams
    16  }
    17  
    18  type p256Point struct {
    19  	x [32]byte
    20  	y [32]byte
    21  	z [32]byte
    22  }
    23  
    24  var (
    25  	p256        Curve
    26  	p256PreFast *[37][64]p256Point
    27  )
    28  
    29  // hasVectorFacility reports whether the machine has the z/Architecture
    30  // vector facility installed and enabled.
    31  func hasVectorFacility() bool
    32  
    33  var hasVX = hasVectorFacility()
    34  
    35  func initP256Arch() {
    36  	if hasVX {
    37  		p256 = p256CurveFast{p256Params}
    38  		initTable()
    39  		return
    40  	}
    41  
    42  	// No vector support, use pure Go implementation.
    43  	p256 = p256Curve{p256Params}
    44  	return
    45  }
    46  
    47  func (curve p256CurveFast) Params() *CurveParams {
    48  	return curve.CurveParams
    49  }
    50  
    51  // Functions implemented in p256_asm_s390x.s
    52  // Montgomery multiplication modulo P256
    53  //
    54  //go:noescape
    55  func p256MulAsm(res, in1, in2 []byte)
    56  
    57  // Montgomery square modulo P256
    58  func p256Sqr(res, in []byte) {
    59  	p256MulAsm(res, in, in)
    60  }
    61  
    62  // Montgomery multiplication by 1
    63  //
    64  //go:noescape
    65  func p256FromMont(res, in []byte)
    66  
    67  // iff cond == 1  val <- -val
    68  //
    69  //go:noescape
    70  func p256NegCond(val *p256Point, cond int)
    71  
    72  // if cond == 0 res <- b; else res <- a
    73  //
    74  //go:noescape
    75  func p256MovCond(res, a, b *p256Point, cond int)
    76  
    77  // Constant time table access
    78  //
    79  //go:noescape
    80  func p256Select(point *p256Point, table []p256Point, idx int)
    81  
    82  //go:noescape
    83  func p256SelectBase(point *p256Point, table []p256Point, idx int)
    84  
    85  // Montgomery multiplication modulo Ord(G)
    86  //
    87  //go:noescape
    88  func p256OrdMul(res, in1, in2 []byte)
    89  
    90  // Montgomery square modulo Ord(G), repeated n times
    91  func p256OrdSqr(res, in []byte, n int) {
    92  	copy(res, in)
    93  	for i := 0; i < n; i += 1 {
    94  		p256OrdMul(res, res, res)
    95  	}
    96  }
    97  
    98  // Point add with P2 being affine point
    99  // If sign == 1 -> P2 = -P2
   100  // If sel == 0 -> P3 = P1
   101  // if zero == 0 -> P3 = P2
   102  //
   103  //go:noescape
   104  func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
   105  
   106  // Point add
   107  //
   108  //go:noescape
   109  func p256PointAddAsm(P3, P1, P2 *p256Point) int
   110  
   111  //go:noescape
   112  func p256PointDoubleAsm(P3, P1 *p256Point)
   113  
   114  func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
   115  	if k.Cmp(p256Params.N) >= 0 {
   116  		// This should never happen.
   117  		reducedK := new(big.Int).Mod(k, p256Params.N)
   118  		k = reducedK
   119  	}
   120  
   121  	// table will store precomputed powers of x. The 32 bytes at index
   122  	// i store x^(i+1).
   123  	var table [15][32]byte
   124  
   125  	x := fromBig(k)
   126  	// This code operates in the Montgomery domain where R = 2^256 mod n
   127  	// and n is the order of the scalar field. (See initP256 for the
   128  	// value.) Elements in the Montgomery domain take the form a×R and
   129  	// multiplication of x and y in the calculates (x × y × R^-1) mod n. RR
   130  	// is R×R mod n thus the Montgomery multiplication x and RR gives x×R,
   131  	// i.e. converts x into the Montgomery domain. Stored in BigEndian form
   132  	RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
   133  		0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
   134  
   135  	p256OrdMul(table[0][:], x, RR)
   136  
   137  	// Prepare the table, no need in constant time access, because the
   138  	// power is not a secret. (Entry 0 is never used.)
   139  	for i := 2; i < 16; i += 2 {
   140  		p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
   141  		p256OrdMul(table[i][:], table[i-1][:], table[0][:])
   142  	}
   143  
   144  	copy(x, table[14][:]) // f
   145  
   146  	p256OrdSqr(x[0:32], x[0:32], 4)
   147  	p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
   148  	t := make([]byte, 32)
   149  	copy(t, x)
   150  
   151  	p256OrdSqr(x, x, 8)
   152  	p256OrdMul(x, x, t) // ffff
   153  	copy(t, x)
   154  
   155  	p256OrdSqr(x, x, 16)
   156  	p256OrdMul(x, x, t) // ffffffff
   157  	copy(t, x)
   158  
   159  	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
   160  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
   161  	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
   162  	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
   163  
   164  	// Remaining 32 windows
   165  	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
   166  		0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
   167  	for i := 0; i < 32; i++ {
   168  		p256OrdSqr(x, x, 4)
   169  		p256OrdMul(x, x, table[expLo[i]-1][:])
   170  	}
   171  
   172  	// Multiplying by one in the Montgomery domain converts a Montgomery
   173  	// value out of the domain.
   174  	one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   175  		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
   176  	p256OrdMul(x, x, one)
   177  
   178  	return new(big.Int).SetBytes(x)
   179  }
   180  
   181  // fromBig converts a *big.Int into a format used by this code.
   182  func fromBig(big *big.Int) []byte {
   183  	// This could be done a lot more efficiently...
   184  	res := big.Bytes()
   185  	if 32 == len(res) {
   186  		return res
   187  	}
   188  	t := make([]byte, 32)
   189  	offset := 32 - len(res)
   190  	for i := len(res) - 1; i >= 0; i-- {
   191  		t[i+offset] = res[i]
   192  	}
   193  	return t
   194  }
   195  
   196  // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
   197  // is equal or greater than the order of the group, it's reduced modulo that order.
   198  func p256GetMultiplier(in []byte) []byte {
   199  	n := new(big.Int).SetBytes(in)
   200  
   201  	if n.Cmp(p256Params.N) >= 0 {
   202  		n.Mod(n, p256Params.N)
   203  	}
   204  	return fromBig(n)
   205  }
   206  
   207  // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
   208  // underlying field of the curve. (See initP256 for the value.) Thus rr here is
   209  // R×R mod p. See comment in Inverse about how this is used.
   210  var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
   211  	0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
   212  
   213  // (This is one, in the Montgomery domain.)
   214  var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   215  	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
   216  
   217  func maybeReduceModP(in *big.Int) *big.Int {
   218  	if in.Cmp(p256Params.P) < 0 {
   219  		return in
   220  	}
   221  	return new(big.Int).Mod(in, p256Params.P)
   222  }
   223  
   224  func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
   225  	var r1, r2 p256Point
   226  	scalarReduced := p256GetMultiplier(baseScalar)
   227  	r1IsInfinity := scalarIsZero(scalarReduced)
   228  	r1.p256BaseMult(scalarReduced)
   229  
   230  	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
   231  	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
   232  	copy(r2.z[:], one)
   233  	p256MulAsm(r2.x[:], r2.x[:], rr[:])
   234  	p256MulAsm(r2.y[:], r2.y[:], rr[:])
   235  
   236  	scalarReduced = p256GetMultiplier(scalar)
   237  	r2IsInfinity := scalarIsZero(scalarReduced)
   238  	r2.p256ScalarMult(p256GetMultiplier(scalar))
   239  
   240  	var sum, double p256Point
   241  	pointsEqual := p256PointAddAsm(&sum, &r1, &r2)
   242  	p256PointDoubleAsm(&double, &r1)
   243  	p256MovCond(&sum, &double, &sum, pointsEqual)
   244  	p256MovCond(&sum, &r1, &sum, r2IsInfinity)
   245  	p256MovCond(&sum, &r2, &sum, r1IsInfinity)
   246  	return sum.p256PointToAffine()
   247  }
   248  
   249  func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
   250  	var r p256Point
   251  	r.p256BaseMult(p256GetMultiplier(scalar))
   252  	return r.p256PointToAffine()
   253  }
   254  
   255  func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
   256  	var r p256Point
   257  	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
   258  	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
   259  	copy(r.z[:], one)
   260  	p256MulAsm(r.x[:], r.x[:], rr[:])
   261  	p256MulAsm(r.y[:], r.y[:], rr[:])
   262  	r.p256ScalarMult(p256GetMultiplier(scalar))
   263  	return r.p256PointToAffine()
   264  }
   265  
   266  // scalarIsZero returns 1 if scalar represents the zero value, and zero
   267  // otherwise.
   268  func scalarIsZero(scalar []byte) int {
   269  	b := byte(0)
   270  	for _, s := range scalar {
   271  		b |= s
   272  	}
   273  	return subtle.ConstantTimeByteEq(b, 0)
   274  }
   275  
   276  func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
   277  	zInv := make([]byte, 32)
   278  	zInvSq := make([]byte, 32)
   279  
   280  	p256Inverse(zInv, p.z[:])
   281  	p256Sqr(zInvSq, zInv)
   282  	p256MulAsm(zInv, zInv, zInvSq)
   283  
   284  	p256MulAsm(zInvSq, p.x[:], zInvSq)
   285  	p256MulAsm(zInv, p.y[:], zInv)
   286  
   287  	p256FromMont(zInvSq, zInvSq)
   288  	p256FromMont(zInv, zInv)
   289  
   290  	return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
   291  }
   292  
   293  // p256Inverse sets out to in^-1 mod p.
   294  func p256Inverse(out, in []byte) {
   295  	var stack [6 * 32]byte
   296  	p2 := stack[32*0 : 32*0+32]
   297  	p4 := stack[32*1 : 32*1+32]
   298  	p8 := stack[32*2 : 32*2+32]
   299  	p16 := stack[32*3 : 32*3+32]
   300  	p32 := stack[32*4 : 32*4+32]
   301  
   302  	p256Sqr(out, in)
   303  	p256MulAsm(p2, out, in) // 3*p
   304  
   305  	p256Sqr(out, p2)
   306  	p256Sqr(out, out)
   307  	p256MulAsm(p4, out, p2) // f*p
   308  
   309  	p256Sqr(out, p4)
   310  	p256Sqr(out, out)
   311  	p256Sqr(out, out)
   312  	p256Sqr(out, out)
   313  	p256MulAsm(p8, out, p4) // ff*p
   314  
   315  	p256Sqr(out, p8)
   316  
   317  	for i := 0; i < 7; i++ {
   318  		p256Sqr(out, out)
   319  	}
   320  	p256MulAsm(p16, out, p8) // ffff*p
   321  
   322  	p256Sqr(out, p16)
   323  	for i := 0; i < 15; i++ {
   324  		p256Sqr(out, out)
   325  	}
   326  	p256MulAsm(p32, out, p16) // ffffffff*p
   327  
   328  	p256Sqr(out, p32)
   329  
   330  	for i := 0; i < 31; i++ {
   331  		p256Sqr(out, out)
   332  	}
   333  	p256MulAsm(out, out, in)
   334  
   335  	for i := 0; i < 32*4; i++ {
   336  		p256Sqr(out, out)
   337  	}
   338  	p256MulAsm(out, out, p32)
   339  
   340  	for i := 0; i < 32; i++ {
   341  		p256Sqr(out, out)
   342  	}
   343  	p256MulAsm(out, out, p32)
   344  
   345  	for i := 0; i < 16; i++ {
   346  		p256Sqr(out, out)
   347  	}
   348  	p256MulAsm(out, out, p16)
   349  
   350  	for i := 0; i < 8; i++ {
   351  		p256Sqr(out, out)
   352  	}
   353  	p256MulAsm(out, out, p8)
   354  
   355  	p256Sqr(out, out)
   356  	p256Sqr(out, out)
   357  	p256Sqr(out, out)
   358  	p256Sqr(out, out)
   359  	p256MulAsm(out, out, p4)
   360  
   361  	p256Sqr(out, out)
   362  	p256Sqr(out, out)
   363  	p256MulAsm(out, out, p2)
   364  
   365  	p256Sqr(out, out)
   366  	p256Sqr(out, out)
   367  	p256MulAsm(out, out, in)
   368  }
   369  
   370  func boothW5(in uint) (int, int) {
   371  	var s uint = ^((in >> 5) - 1)
   372  	var d uint = (1 << 6) - in - 1
   373  	d = (d & s) | (in & (^s))
   374  	d = (d >> 1) + (d & 1)
   375  	return int(d), int(s & 1)
   376  }
   377  
   378  func boothW7(in uint) (int, int) {
   379  	var s uint = ^((in >> 7) - 1)
   380  	var d uint = (1 << 8) - in - 1
   381  	d = (d & s) | (in & (^s))
   382  	d = (d >> 1) + (d & 1)
   383  	return int(d), int(s & 1)
   384  }
   385  
   386  func initTable() {
   387  	p256PreFast = new([37][64]p256Point) //z coordinate not used
   388  	basePoint := p256Point{
   389  		x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
   390  			0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
   391  		y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
   392  			0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
   393  		z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
   394  			0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
   395  	}
   396  
   397  	t1 := new(p256Point)
   398  	t2 := new(p256Point)
   399  	*t2 = basePoint
   400  
   401  	zInv := make([]byte, 32)
   402  	zInvSq := make([]byte, 32)
   403  	for j := 0; j < 64; j++ {
   404  		*t1 = *t2
   405  		for i := 0; i < 37; i++ {
   406  			// The window size is 7 so we need to double 7 times.
   407  			if i != 0 {
   408  				for k := 0; k < 7; k++ {
   409  					p256PointDoubleAsm(t1, t1)
   410  				}
   411  			}
   412  			// Convert the point to affine form. (Its values are
   413  			// still in Montgomery form however.)
   414  			p256Inverse(zInv, t1.z[:])
   415  			p256Sqr(zInvSq, zInv)
   416  			p256MulAsm(zInv, zInv, zInvSq)
   417  
   418  			p256MulAsm(t1.x[:], t1.x[:], zInvSq)
   419  			p256MulAsm(t1.y[:], t1.y[:], zInv)
   420  
   421  			copy(t1.z[:], basePoint.z[:])
   422  			// Update the table entry
   423  			copy(p256PreFast[i][j].x[:], t1.x[:])
   424  			copy(p256PreFast[i][j].y[:], t1.y[:])
   425  		}
   426  		if j == 0 {
   427  			p256PointDoubleAsm(t2, &basePoint)
   428  		} else {
   429  			p256PointAddAsm(t2, t2, &basePoint)
   430  		}
   431  	}
   432  }
   433  
   434  func (p *p256Point) p256BaseMult(scalar []byte) {
   435  	wvalue := (uint(scalar[31]) << 1) & 0xff
   436  	sel, sign := boothW7(uint(wvalue))
   437  	p256SelectBase(p, p256PreFast[0][:], sel)
   438  	p256NegCond(p, sign)
   439  
   440  	copy(p.z[:], one[:])
   441  	var t0 p256Point
   442  
   443  	copy(t0.z[:], one[:])
   444  
   445  	index := uint(6)
   446  	zero := sel
   447  
   448  	for i := 1; i < 37; i++ {
   449  		if index < 247 {
   450  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
   451  		} else {
   452  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
   453  		}
   454  		index += 7
   455  		sel, sign = boothW7(uint(wvalue))
   456  		p256SelectBase(&t0, p256PreFast[i][:], sel)
   457  		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
   458  		zero |= sel
   459  	}
   460  }
   461  
   462  func (p *p256Point) p256ScalarMult(scalar []byte) {
   463  	// precomp is a table of precomputed points that stores powers of p
   464  	// from p^1 to p^16.
   465  	var precomp [16]p256Point
   466  	var t0, t1, t2, t3 p256Point
   467  
   468  	// Prepare the table
   469  	*&precomp[0] = *p
   470  
   471  	p256PointDoubleAsm(&t0, p)
   472  	p256PointDoubleAsm(&t1, &t0)
   473  	p256PointDoubleAsm(&t2, &t1)
   474  	p256PointDoubleAsm(&t3, &t2)
   475  	*&precomp[1] = t0  // 2
   476  	*&precomp[3] = t1  // 4
   477  	*&precomp[7] = t2  // 8
   478  	*&precomp[15] = t3 // 16
   479  
   480  	p256PointAddAsm(&t0, &t0, p)
   481  	p256PointAddAsm(&t1, &t1, p)
   482  	p256PointAddAsm(&t2, &t2, p)
   483  	*&precomp[2] = t0 // 3
   484  	*&precomp[4] = t1 // 5
   485  	*&precomp[8] = t2 // 9
   486  
   487  	p256PointDoubleAsm(&t0, &t0)
   488  	p256PointDoubleAsm(&t1, &t1)
   489  	*&precomp[5] = t0 // 6
   490  	*&precomp[9] = t1 // 10
   491  
   492  	p256PointAddAsm(&t2, &t0, p)
   493  	p256PointAddAsm(&t1, &t1, p)
   494  	*&precomp[6] = t2  // 7
   495  	*&precomp[10] = t1 // 11
   496  
   497  	p256PointDoubleAsm(&t0, &t0)
   498  	p256PointDoubleAsm(&t2, &t2)
   499  	*&precomp[11] = t0 // 12
   500  	*&precomp[13] = t2 // 14
   501  
   502  	p256PointAddAsm(&t0, &t0, p)
   503  	p256PointAddAsm(&t2, &t2, p)
   504  	*&precomp[12] = t0 // 13
   505  	*&precomp[14] = t2 // 15
   506  
   507  	// Start scanning the window from top bit
   508  	index := uint(254)
   509  	var sel, sign int
   510  
   511  	wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   512  	sel, _ = boothW5(uint(wvalue))
   513  	p256Select(p, precomp[:], sel)
   514  	zero := sel
   515  
   516  	for index > 4 {
   517  		index -= 5
   518  		p256PointDoubleAsm(p, p)
   519  		p256PointDoubleAsm(p, p)
   520  		p256PointDoubleAsm(p, p)
   521  		p256PointDoubleAsm(p, p)
   522  		p256PointDoubleAsm(p, p)
   523  
   524  		if index < 247 {
   525  			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
   526  		} else {
   527  			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
   528  		}
   529  
   530  		sel, sign = boothW5(uint(wvalue))
   531  
   532  		p256Select(&t0, precomp[:], sel)
   533  		p256NegCond(&t0, sign)
   534  		p256PointAddAsm(&t1, p, &t0)
   535  		p256MovCond(&t1, &t1, p, sel)
   536  		p256MovCond(p, &t1, &t0, zero)
   537  		zero |= sel
   538  	}
   539  
   540  	p256PointDoubleAsm(p, p)
   541  	p256PointDoubleAsm(p, p)
   542  	p256PointDoubleAsm(p, p)
   543  	p256PointDoubleAsm(p, p)
   544  	p256PointDoubleAsm(p, p)
   545  
   546  	wvalue = (uint(scalar[31]) << 1) & 0x3f
   547  	sel, sign = boothW5(uint(wvalue))
   548  
   549  	p256Select(&t0, precomp[:], sel)
   550  	p256NegCond(&t0, sign)
   551  	p256PointAddAsm(&t1, p, &t0)
   552  	p256MovCond(&t1, &t1, p, sel)
   553  	p256MovCond(p, &t1, &t0, zero)
   554  }