github.com/piotrnar/gocoin@v0.0.0-20240512203912-faa0448c5e96/lib/secp256k1/field_5x52.go (about)

     1  // +build amd64 arm64 arm64be ppc64 ppc64le mips64 mips64le s390x sparc64
     2  
     3  package secp256k1
     4  
     5  import (
     6  	"math/bits"
     7  )
     8  
     9  const FieldArch = "5x52"
    10  
    11  type Field struct {
    12  	n [5]uint64
    13  }
    14  
    15  func (r *Field) SetB32(a []byte) {
    16  	r.n[0] = uint64(a[31]) | (uint64(a[30]) << 8) | (uint64(a[29]) << 16) |
    17  		(uint64(a[28]) << 24) | (uint64(a[27]) << 32) | (uint64(a[26]) << 40) | ((uint64(a[25]) & 0xF) << 48)
    18  
    19  	r.n[1] = ((uint64(a[25]) >> 4) & 0xF) | (uint64(a[24]) << 4) | (uint64(a[23]) << 12) | (uint64(a[22]) << 20) |
    20  		(uint64(a[21]) << 28) | (uint64(a[20]) << 36) | (uint64(a[19]) << 44)
    21  
    22  	r.n[2] = uint64(a[18]) | (uint64(a[17]) << 8) | (uint64(a[16]) << 16) | (uint64(a[15]) << 24) |
    23  		(uint64(a[14]) << 32) | (uint64(a[13]) << 40) | ((uint64(a[12]) & 0xF) << 48)
    24  
    25  	r.n[3] = ((uint64(a[12]) >> 4) & 0xF) | (uint64(a[11]) << 4) | (uint64(a[10]) << 12) |
    26  		(uint64(a[9]) << 20) | (uint64(a[8]) << 28) | (uint64(a[7]) << 36) | (uint64(a[6]) << 44)
    27  
    28  	r.n[4] = uint64(a[5]) | (uint64(a[4]) << 8) | (uint64(a[3]) << 16) | (uint64(a[2]) << 24) |
    29  		(uint64(a[1]) << 32) | (uint64(a[0]) << 40)
    30  }
    31  
    32  func (a *Field) IsZero() bool {
    33  	return (a.n[0] == 0 && a.n[1] == 0 && a.n[2] == 0 && a.n[3] == 0 && a.n[4] == 0)
    34  }
    35  
    36  func (r *Field) SetInt(a uint64) {
    37  	r.n[0] = a
    38  	r.n[1] = 0
    39  	r.n[2] = 0
    40  	r.n[3] = 0
    41  	r.n[4] = 0
    42  }
    43  
    44  func (r *Field) Normalize() {
    45  	t0 := r.n[0]
    46  	t1 := r.n[1]
    47  	t2 := r.n[2]
    48  	t3 := r.n[3]
    49  	t4 := r.n[4]
    50  
    51  	/* Reduce t4 at the start so there will be at most a single carry from the first pass */
    52  	var m uint64
    53  	x := t4 >> 48
    54  	t4 &= 0x0FFFFFFFFFFFF
    55  
    56  	/* The first pass ensures the magnitude is 1, ... */
    57  	t0 += x * 0x1000003D1
    58  	t1 += (t0 >> 52)
    59  	t0 &= 0xFFFFFFFFFFFFF
    60  	t2 += (t1 >> 52)
    61  	t1 &= 0xFFFFFFFFFFFFF
    62  	m = t1
    63  	t3 += (t2 >> 52)
    64  	t2 &= 0xFFFFFFFFFFFFF
    65  	m &= t2
    66  	t4 += (t3 >> 52)
    67  	t3 &= 0xFFFFFFFFFFFFF
    68  	m &= t3
    69  
    70  	/* At most a single final reduction is needed; check if the value is >= the field characteristic */
    71  	x = (t4 >> 48)
    72  	if (t4 == 0x0FFFFFFFFFFFF) && (m == 0xFFFFFFFFFFFFF) && (t0 >= 0xFFFFEFFFFFC2F) {
    73  		x |= 1
    74  	}
    75  
    76  	/* Apply the final reduction (for constant-time behaviour, we do it always) */
    77  	t0 += x * 0x1000003D1
    78  	t1 += (t0 >> 52)
    79  	t0 &= 0xFFFFFFFFFFFFF
    80  	t2 += (t1 >> 52)
    81  	t1 &= 0xFFFFFFFFFFFFF
    82  	t3 += (t2 >> 52)
    83  	t2 &= 0xFFFFFFFFFFFFF
    84  	t4 += (t3 >> 52)
    85  	t3 &= 0xFFFFFFFFFFFFF
    86  
    87  	/* Mask off the possible multiple of 2^256 from the final reduction */
    88  	t4 &= 0x0FFFFFFFFFFFF
    89  
    90  	r.n[0] = t0
    91  	r.n[1] = t1
    92  	r.n[2] = t2
    93  	r.n[3] = t3
    94  	r.n[4] = t4
    95  }
    96  
    97  func (a *Field) GetB32(r []byte) {
    98  	r[0] = byte(a.n[4] >> 40)
    99  	r[1] = byte(a.n[4] >> 32)
   100  	r[2] = byte(a.n[4] >> 24)
   101  	r[3] = byte(a.n[4] >> 16)
   102  	r[4] = byte(a.n[4] >> 8)
   103  	r[5] = byte(a.n[4])
   104  	r[6] = byte(a.n[3] >> 44)
   105  	r[7] = byte(a.n[3] >> 36)
   106  	r[8] = byte(a.n[3] >> 28)
   107  	r[9] = byte(a.n[3] >> 20)
   108  	r[10] = byte(a.n[3] >> 12)
   109  	r[11] = byte(a.n[3] >> 4)
   110  	r[12] = (byte(a.n[2]>>48) & 0xF) | (byte(a.n[3]&0xF) << 4)
   111  	r[13] = byte(a.n[2] >> 40)
   112  	r[14] = byte(a.n[2] >> 32)
   113  	r[15] = byte(a.n[2] >> 24)
   114  	r[16] = byte(a.n[2] >> 16)
   115  	r[17] = byte(a.n[2] >> 8)
   116  	r[18] = byte(a.n[2])
   117  	r[19] = byte(a.n[1] >> 44)
   118  	r[20] = byte(a.n[1] >> 36)
   119  	r[21] = byte(a.n[1] >> 28)
   120  	r[22] = byte(a.n[1] >> 20)
   121  	r[23] = byte(a.n[1] >> 12)
   122  	r[24] = byte(a.n[1] >> 4)
   123  	r[25] = (byte(a.n[0]>>48) & 0xF) | (byte(a.n[1]&0xF) << 4)
   124  	r[26] = byte(a.n[0] >> 40)
   125  	r[27] = byte(a.n[0] >> 32)
   126  	r[28] = byte(a.n[0] >> 24)
   127  	r[29] = byte(a.n[0] >> 16)
   128  	r[30] = byte(a.n[0] >> 8)
   129  	r[31] = byte(a.n[0])
   130  }
   131  
   132  func (a *Field) Equals(b *Field) bool {
   133  	return (a.n[0] == b.n[0] && a.n[1] == b.n[1] && a.n[2] == b.n[2] && a.n[3] == b.n[3] && a.n[4] == b.n[4])
   134  }
   135  
   136  func (r *Field) SetAdd(a *Field) {
   137  	r.n[0] += a.n[0]
   138  	r.n[1] += a.n[1]
   139  	r.n[2] += a.n[2]
   140  	r.n[3] += a.n[3]
   141  	r.n[4] += a.n[4]
   142  }
   143  
   144  func (r *Field) MulInt(a uint64) {
   145  	r.n[0] *= a
   146  	r.n[1] *= a
   147  	r.n[2] *= a
   148  	r.n[3] *= a
   149  	r.n[4] *= a
   150  }
   151  
   152  func (a *Field) Negate(r *Field, m uint64) {
   153  	r.n[0] = 0xFFFFEFFFFFC2F*2*(m+1) - a.n[0]
   154  	r.n[1] = 0xFFFFFFFFFFFFF*2*(m+1) - a.n[1]
   155  	r.n[2] = 0xFFFFFFFFFFFFF*2*(m+1) - a.n[2]
   156  	r.n[3] = 0xFFFFFFFFFFFFF*2*(m+1) - a.n[3]
   157  	r.n[4] = 0x0FFFFFFFFFFFF*2*(m+1) - a.n[4]
   158  }
   159  
   160  func (a *Field) Mul(r, b *Field) {
   161  	var c_lo, c_hi, d_lo, d_hi uint64
   162  	var t3, t4, tx, u0, rn0, rn1 uint64
   163  	var carry, him, lom uint64
   164  
   165  	a0 := a.n[0]
   166  	a1 := a.n[1]
   167  	a2 := a.n[2]
   168  	a3 := a.n[3]
   169  	a4 := a.n[4]
   170  
   171  	const M = 0xFFFFFFFFFFFFF
   172  	const R = 0x1000003D10
   173  
   174  	/*  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
   175  	 *  for 0 <= x <= 4, px is a shorthand for sum(a[i]*b[x-i], i=0..x).
   176  	 *  for 4 <= x <= 8, px is a shorthand for sum(a[i]*b[x-i], i=(x-4)..4)
   177  	 *  Note that [x 0 0 0 0 0] = [x*R].
   178  	 */
   179  
   180  	//d.AddMul64s(a0, b.n[3])
   181  	d_hi, d_lo = bits.Mul64(a0, b.n[3])
   182  
   183  	//d.AddMul64s(a1, b.n[2])
   184  	him, lom = bits.Mul64(a1, b.n[2])
   185  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   186  	d_hi, _ = bits.Add64(d_hi, him, carry)
   187  
   188  	//d.AddMul64s(a2, b.n[1])
   189  	him, lom = bits.Mul64(a2, b.n[1])
   190  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   191  	d_hi, _ = bits.Add64(d_hi, him, carry)
   192  
   193  	//d.AddMul64s(a3, b.n[0])
   194  	him, lom = bits.Mul64(a3, b.n[0])
   195  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   196  	d_hi, _ = bits.Add64(d_hi, him, carry)
   197  
   198  	/* [d 0 0 0] = [p3 0 0 0] */
   199  	//c = From64(a4).Mul64(b.n[4])
   200  	c_hi, c_lo = bits.Mul64(a4, b.n[4])
   201  
   202  	/* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
   203  	//d = d.Add(c.And64(M).Mul64(R))
   204  	him, lom = bits.Mul64(c_lo&M, R)
   205  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   206  	d_hi, _ = bits.Add64(d_hi, him, carry)
   207  
   208  	//c = c.Rsh52()
   209  	c_lo = c_lo>>52 | c_hi<<(64-52)
   210  	c_hi = c_hi >> 52
   211  
   212  	/* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
   213  	t3 = d_lo & M
   214  	//d = d.Rsh52()
   215  	d_lo = d_lo>>52 | d_hi<<(64-52)
   216  	d_hi = d_hi >> 52
   217  	/* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
   218  
   219  	//d = d.Add(From64(a0).Mul64(b.n[4]))
   220  	him, lom = bits.Mul64(a0, b.n[4])
   221  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   222  	d_hi, _ = bits.Add64(d_hi, him, carry)
   223  
   224  	//d = d.Add(From64(a1).Mul64(b.n[3]))
   225  	him, lom = bits.Mul64(a1, b.n[3])
   226  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   227  	d_hi, _ = bits.Add64(d_hi, him, carry)
   228  
   229  	//d = d.Add(From64(a2).Mul64(b.n[2]))
   230  	him, lom = bits.Mul64(a2, b.n[2])
   231  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   232  	d_hi, _ = bits.Add64(d_hi, him, carry)
   233  
   234  	//d = d.Add(From64(a3).Mul64(b.n[1]))
   235  	him, lom = bits.Mul64(a3, b.n[1])
   236  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   237  	d_hi, _ = bits.Add64(d_hi, him, carry)
   238  
   239  	//d = d.Add(From64(a4).Mul64(b.n[0]))
   240  	him, lom = bits.Mul64(a4, b.n[0])
   241  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   242  	d_hi, _ = bits.Add64(d_hi, him, carry)
   243  
   244  	//d = d.Add(c.Mul64(R))
   245  	him, lom = bits.Mul64(c_lo, R)
   246  	him += c_hi * R
   247  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   248  	d_hi, _ = bits.Add64(d_hi, him, carry)
   249  
   250  	/* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
   251  	t4 = d_lo & M
   252  
   253  	//d = d.Rsh52()
   254  	d_lo = d_lo>>52 | d_hi<<(64-52)
   255  	d_hi = d_hi >> 52
   256  
   257  	/* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
   258  	tx = (t4 >> 48)
   259  	t4 &= (M >> 4)
   260  	/* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
   261  
   262  	//c = From64(a0).Mul64(b.n[0])
   263  	c_hi, c_lo = bits.Mul64(a0, b.n[0])
   264  
   265  	/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
   266  	//d = d.Add(From64(a1).Mul64(b.n[4]))
   267  	him, lom = bits.Mul64(a1, b.n[4])
   268  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   269  	d_hi, _ = bits.Add64(d_hi, him, carry)
   270  
   271  	//d = d.Add(From64(a2).Mul64(b.n[3]))
   272  	him, lom = bits.Mul64(a2, b.n[3])
   273  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   274  	d_hi, _ = bits.Add64(d_hi, him, carry)
   275  
   276  	//d = d.Add(From64(a3).Mul64(b.n[2]))
   277  	him, lom = bits.Mul64(a3, b.n[2])
   278  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   279  	d_hi, _ = bits.Add64(d_hi, him, carry)
   280  
   281  	//d = d.Add(From64(a4).Mul64(b.n[1]))
   282  	him, lom = bits.Mul64(a4, b.n[1])
   283  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   284  	d_hi, _ = bits.Add64(d_hi, him, carry)
   285  
   286  	/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   287  	u0 = d_lo & M
   288  
   289  	//d = d.Rsh52()
   290  	d_lo = d_lo>>52 | d_hi<<(64-52)
   291  	d_hi = d_hi >> 52
   292  
   293  	/* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   294  	/* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   295  	u0 = (u0 << 4) | tx
   296  	/* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   297  
   298  	//c = c.Add(From64(u0).Mul64(R >> 4))
   299  	him, lom = bits.Mul64(u0, R>>4)
   300  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   301  	c_hi, _ = bits.Add64(c_hi, him, carry)
   302  
   303  	/* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   304  	rn0 = c_lo & M
   305  	//c = c.Rsh52()
   306  	c_lo = c_lo>>52 | c_hi<<(64-52)
   307  	c_hi = c_hi >> 52
   308  
   309  	/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
   310  
   311  	//c = c.Add(From64(a0).Mul64(b.n[1]))
   312  	him, lom = bits.Mul64(a0, b.n[1])
   313  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   314  	c_hi, _ = bits.Add64(c_hi, him, carry)
   315  
   316  	//c = c.Add(From64(a1).Mul64(b.n[0]))
   317  	him, lom = bits.Mul64(a1, b.n[0])
   318  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   319  	c_hi, _ = bits.Add64(c_hi, him, carry)
   320  
   321  	/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
   322  	//d = d.Add(From64(a2).Mul64(b.n[4]))
   323  	him, lom = bits.Mul64(a2, b.n[4])
   324  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   325  	d_hi, _ = bits.Add64(d_hi, him, carry)
   326  
   327  	//d = d.Add(From64(a3).Mul64(b.n[3]))
   328  	him, lom = bits.Mul64(a3, b.n[3])
   329  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   330  	d_hi, _ = bits.Add64(d_hi, him, carry)
   331  
   332  	//d = d.Add(From64(a4).Mul64(b.n[2]))
   333  	him, lom = bits.Mul64(a4, b.n[2])
   334  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   335  	d_hi, _ = bits.Add64(d_hi, him, carry)
   336  
   337  	/* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
   338  	//c = c.Add(From64(d_lo & M).Mul64(R))
   339  	him, lom = bits.Mul64(d_lo&M, R)
   340  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   341  	c_hi, _ = bits.Add64(c_hi, him, carry)
   342  
   343  	//d = d.Rsh52()
   344  	d_lo = d_lo>>52 | d_hi<<(64-52)
   345  	d_hi = d_hi >> 52
   346  
   347  	/* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
   348  	rn1 = c_lo & M
   349  	//c = c.Rsh52()
   350  	c_lo = c_lo>>52 | c_hi<<(64-52)
   351  	c_hi = c_hi >> 52
   352  
   353  	/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
   354  
   355  	//c = c.Add(From64(a0).Mul64(b.n[2]))
   356  	him, lom = bits.Mul64(a0, b.n[2])
   357  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   358  	c_hi, _ = bits.Add64(c_hi, him, carry)
   359  
   360  	//c = c.Add(From64(a1).Mul64(b.n[1]))
   361  	him, lom = bits.Mul64(a1, b.n[1])
   362  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   363  	c_hi, _ = bits.Add64(c_hi, him, carry)
   364  
   365  	//c = c.Add(From64(a2).Mul64(b.n[0]))
   366  	him, lom = bits.Mul64(a2, b.n[0])
   367  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   368  	c_hi, _ = bits.Add64(c_hi, him, carry)
   369  
   370  	/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
   371  	//d = d.Add(From64(a3).Mul64(b.n[4]))
   372  	him, lom = bits.Mul64(a3, b.n[4])
   373  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   374  	d_hi, _ = bits.Add64(d_hi, him, carry)
   375  
   376  	//d = d.Add(From64(a4).Mul64(b.n[3]))
   377  	him, lom = bits.Mul64(a4, b.n[3])
   378  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   379  	d_hi, _ = bits.Add64(d_hi, him, carry)
   380  
   381  	/* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   382  	//c = c.Add(From64(d_lo & M).Mul64(R))
   383  	him, lom = bits.Mul64(d_lo&M, R)
   384  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   385  	c_hi, _ = bits.Add64(c_hi, him, carry)
   386  
   387  	//d = d.Rsh52()
   388  	d_lo = d_lo>>52 | d_hi<<(64-52)
   389  	d_hi = d_hi >> 52
   390  
   391  	/* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   392  
   393  	r.n[0] = rn0
   394  	r.n[1] = rn1
   395  
   396  	/* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   397  	r.n[2] = c_lo & M
   398  	//c = c.Rsh52()
   399  	c_lo = c_lo>>52 | c_hi<<(64-52)
   400  	c_hi = c_hi >> 52
   401  	/* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   402  
   403  	//c = c.Add(d.Mul64(R).Add64(t3))
   404  	him, lom = bits.Mul64(d_lo, R)
   405  	him += d_hi * R
   406  	lom, carry = bits.Add64(lom, t3, 0)
   407  	him += carry
   408  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   409  	c_hi, _ = bits.Add64(c_hi, him, carry)
   410  
   411  	/* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   412  	r.n[3] = c_lo & M
   413  	//c = c.Rsh52()
   414  
   415  	r.n[4] = (c_lo>>52 | c_hi<<(64-52)) + t4
   416  }
   417  
   418  func (a *Field) Sqr(r *Field) {
   419  	var c_lo, c_hi, d_lo, d_hi uint64
   420  	var carry, him, lom uint64
   421  
   422  	a0 := a.n[0]
   423  	a1 := a.n[1]
   424  	a2 := a.n[2]
   425  	a3 := a.n[3]
   426  	a4 := a.n[4]
   427  	var t3, t4, tx, u0 uint64
   428  	const (
   429  		M = 0xFFFFFFFFFFFFF
   430  		R = 0x1000003D10
   431  	)
   432  
   433  	/**  [... a b c] is a shorthand for ... + a<<104 + b<<52 + c<<0 mod n.
   434  	 *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
   435  	 *  Note that [x 0 0 0 0 0] = [x*R].
   436  	 */
   437  
   438  	//d = From64(a0 * 2).Mul64(a3)
   439  	d_hi, d_lo = bits.Mul64(a0*2, a3)
   440  
   441  	//d = d.Add(From64(a1 * 2).Mul64(a2))
   442  	him, lom = bits.Mul64(a1*2, a2)
   443  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   444  	d_hi, _ = bits.Add64(d_hi, him, carry)
   445  
   446  	/* [d 0 0 0] = [p3 0 0 0] */
   447  	//c = From64(a4).Mul64(a4)
   448  	him, lom = bits.Mul64(a4, a4)
   449  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   450  	c_hi, _ = bits.Add64(c_hi, him, carry)
   451  
   452  	/* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
   453  	//d = d.Add(c.And64(M).Mul64(R))
   454  	him, lom = bits.Mul64(c_lo&M, R)
   455  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   456  	d_hi, _ = bits.Add64(d_hi, him, carry)
   457  
   458  	//c = c.Rsh52()
   459  	c_lo = c_lo>>52 | c_hi<<(64-52)
   460  	c_hi = c_hi >> 52
   461  
   462  	/* [c 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
   463  	t3 = d_lo & M
   464  
   465  	//d = d.Rsh52()
   466  	d_lo = d_lo>>52 | d_hi<<(64-52)
   467  	d_hi = d_hi >> 52
   468  
   469  	/* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */
   470  
   471  	a4 *= 2
   472  	//d = d.Add(From64(a0).Mul64(a4))
   473  	him, lom = bits.Mul64(a0, a4)
   474  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   475  	d_hi, _ = bits.Add64(d_hi, him, carry)
   476  
   477  	//d = d.Add(From64(a1 * 2).Mul64(a3))
   478  	him, lom = bits.Mul64(a1*2, a3)
   479  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   480  	d_hi, _ = bits.Add64(d_hi, him, carry)
   481  
   482  	//d = d.Add(From64(a2).Mul64(a2))
   483  	him, lom = bits.Mul64(a2, a2)
   484  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   485  	d_hi, _ = bits.Add64(d_hi, him, carry)
   486  
   487  	/* [c 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
   488  
   489  	//d = d.Add(c.Mul64(R))
   490  	him, lom = bits.Mul64(c_lo, R)
   491  	him += c_hi * R
   492  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   493  	d_hi, _ = bits.Add64(d_hi, him, carry)
   494  
   495  	/* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
   496  	t4 = d_lo & M
   497  
   498  	//d = d.Rsh52()
   499  	d_lo = d_lo>>52 | d_hi<<(64-52)
   500  	d_hi = d_hi >> 52
   501  
   502  	/* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
   503  	tx = (t4 >> 48)
   504  	t4 &= (M >> 4)
   505  	/* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */
   506  
   507  	//c = From64(a0).Mul64(a0)
   508  	c_hi, c_lo = bits.Mul64(a0, a0)
   509  
   510  	/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */
   511  	//d = d.Add(From64(a1).Mul64(a4))
   512  	him, lom = bits.Mul64(a1, a4)
   513  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   514  	d_hi, _ = bits.Add64(d_hi, him, carry)
   515  
   516  	//d = d.Add(From64(a2 * 2).Mul64(a3))
   517  	him, lom = bits.Mul64(a2*2, a3)
   518  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   519  	d_hi, _ = bits.Add64(d_hi, him, carry)
   520  
   521  	/* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   522  	u0 = d_lo & M
   523  
   524  	//d = d.Rsh52()
   525  	d_lo = d_lo>>52 | d_hi<<(64-52)
   526  	d_hi = d_hi >> 52
   527  
   528  	/* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   529  	/* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   530  	u0 = (u0 << 4) | tx
   531  	/* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   532  
   533  	//c = c.Add(From64(u0).Mul64(R >> 4))
   534  	him, lom = bits.Mul64(u0, R>>4)
   535  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   536  	c_hi, _ = bits.Add64(c_hi, him, carry)
   537  
   538  	/* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */
   539  	r.n[0] = c_lo & M
   540  
   541  	//c = c.Rsh52()
   542  	c_lo = c_lo>>52 | c_hi<<(64-52)
   543  	c_hi = c_hi >> 52
   544  
   545  	/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */
   546  
   547  	a0 *= 2
   548  	//c = c.Add(From64(a0).Mul64(a1))
   549  	him, lom = bits.Mul64(a0, a1)
   550  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   551  	c_hi, _ = bits.Add64(c_hi, him, carry)
   552  
   553  	/* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */
   554  	//d = d.Add(From64(a2).Mul64(a4))
   555  	him, lom = bits.Mul64(a2, a4)
   556  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   557  	d_hi, _ = bits.Add64(d_hi, him, carry)
   558  
   559  	//d = d.Add(From64(a3).Mul64(a3))
   560  	him, lom = bits.Mul64(a3, a3)
   561  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   562  	d_hi, _ = bits.Add64(d_hi, him, carry)
   563  
   564  	/* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
   565  	//c = c.Add(From64(d_lo & M).Mul64(R))
   566  	him, lom = bits.Mul64(d_lo&M, R)
   567  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   568  	c_hi, _ = bits.Add64(c_hi, him, carry)
   569  
   570  	//d = d.Rsh52()
   571  	d_lo = d_lo>>52 | d_hi<<(64-52)
   572  	d_hi = d_hi >> 52
   573  
   574  	/* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
   575  	r.n[1] = c_lo & M
   576  
   577  	//c = c.Rsh52()
   578  	c_lo = c_lo>>52 | c_hi<<(64-52)
   579  	c_hi = c_hi >> 52
   580  	/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */
   581  
   582  	//c = c.Add(From64(a0).Mul64(a2))
   583  	him, lom = bits.Mul64(a0, a2)
   584  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   585  	c_hi, _ = bits.Add64(c_hi, him, carry)
   586  
   587  	//c = c.Add(From64(a1).Mul64(a1))
   588  	him, lom = bits.Mul64(a1, a1)
   589  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   590  	c_hi, _ = bits.Add64(c_hi, him, carry)
   591  
   592  	/* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */
   593  	//d = d.Add(From64(a3).Mul64(a4))
   594  	him, lom = bits.Mul64(a3, a4)
   595  	d_lo, carry = bits.Add64(d_lo, lom, 0)
   596  	d_hi, _ = bits.Add64(d_hi, him, carry)
   597  
   598  	/* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   599  	//c = c.Add(From64(d_lo & M).Mul64(R))
   600  	him, lom = bits.Mul64(d_lo&M, R)
   601  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   602  	c_hi, _ = bits.Add64(c_hi, him, carry)
   603  
   604  	//d = d.Rsh52()
   605  	d_lo = d_lo>>52 | d_hi<<(64-52)
   606  	d_hi = d_hi >> 52
   607  
   608  	/* [d 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   609  	r.n[2] = c_lo & M
   610  
   611  	//c = c.Rsh52()
   612  	c_lo = c_lo>>52 | c_hi<<(64-52)
   613  	c_hi = c_hi >> 52
   614  
   615  	/* [d 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   616  
   617  	//c = c.Add(d.Mul64(R).Add64(t3))
   618  	him, lom = bits.Mul64(d_lo, R)
   619  	him += d_hi * R
   620  	lom, carry = bits.Add64(lom, t3, 0)
   621  	him += carry
   622  	c_lo, carry = bits.Add64(c_lo, lom, 0)
   623  	c_hi, _ = bits.Add64(c_hi, him, carry)
   624  
   625  	/* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   626  	r.n[3] = c_lo & M
   627  
   628  	r.n[4] = (c_lo>>52 | c_hi<<(64-52)) + t4
   629  }