github.com/zmap/zcrypto@v0.0.0-20240512203510-0fef58d9a9db/tls/poly1305.go (about)

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tls
     6  
     7  // Based on original, public domain implementation from NaCl by D. J.
     8  // Bernstein.
     9  
    10  import (
    11  	"crypto/subtle"
    12  	"math"
    13  )
    14  
    15  const (
    16  	alpham80 = 0.00000000558793544769287109375
    17  	alpham48 = 24.0
    18  	alpham16 = 103079215104.0
    19  	alpha0   = 6755399441055744.0
    20  	alpha18  = 1770887431076116955136.0
    21  	alpha32  = 29014219670751100192948224.0
    22  	alpha50  = 7605903601369376408980219232256.0
    23  	alpha64  = 124615124604835863084731911901282304.0
    24  	alpha82  = 32667107224410092492483962313449748299776.0
    25  	alpha96  = 535217884764734955396857238543560676143529984.0
    26  	alpha112 = 35076039295941670036888435985190792471742381031424.0
    27  	alpha130 = 9194973245195333150150082162901855101712434733101613056.0
    28  	scale    = 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125
    29  	offset0  = 6755408030990331.0
    30  	offset1  = 29014256564239239022116864.0
    31  	offset2  = 124615283061160854719918951570079744.0
    32  	offset3  = 535219245894202480694386063513315216128475136.0
    33  )
    34  
    35  // poly1305Verify returns true if mac is a valid authenticator for m with the
    36  // given key.
    37  func poly1305Verify(mac *[16]byte, m []byte, key *[32]byte) bool {
    38  	var tmp [16]byte
    39  	poly1305Sum(&tmp, m, key)
    40  	return subtle.ConstantTimeCompare(tmp[:], mac[:]) == 1
    41  }
    42  
    43  // poly1305Sum generates an authenticator for m using a one-time key and puts
    44  // the 16-byte result into out. Authenticating two different messages with the
    45  // same key allows an attacker to forge messages at will.
    46  func poly1305Sum(out *[16]byte, m []byte, key *[32]byte) {
    47  	r := key
    48  	s := key[16:]
    49  	var (
    50  		y7        float64
    51  		y6        float64
    52  		y1        float64
    53  		y0        float64
    54  		y5        float64
    55  		y4        float64
    56  		x7        float64
    57  		x6        float64
    58  		x1        float64
    59  		x0        float64
    60  		y3        float64
    61  		y2        float64
    62  		x5        float64
    63  		r3lowx0   float64
    64  		x4        float64
    65  		r0lowx6   float64
    66  		x3        float64
    67  		r3highx0  float64
    68  		x2        float64
    69  		r0highx6  float64
    70  		r0lowx0   float64
    71  		sr1lowx6  float64
    72  		r0highx0  float64
    73  		sr1highx6 float64
    74  		sr3low    float64
    75  		r1lowx0   float64
    76  		sr2lowx6  float64
    77  		r1highx0  float64
    78  		sr2highx6 float64
    79  		r2lowx0   float64
    80  		sr3lowx6  float64
    81  		r2highx0  float64
    82  		sr3highx6 float64
    83  		r1highx4  float64
    84  		r1lowx4   float64
    85  		r0highx4  float64
    86  		r0lowx4   float64
    87  		sr3highx4 float64
    88  		sr3lowx4  float64
    89  		sr2highx4 float64
    90  		sr2lowx4  float64
    91  		r0lowx2   float64
    92  		r0highx2  float64
    93  		r1lowx2   float64
    94  		r1highx2  float64
    95  		r2lowx2   float64
    96  		r2highx2  float64
    97  		sr3lowx2  float64
    98  		sr3highx2 float64
    99  		z0        float64
   100  		z1        float64
   101  		z2        float64
   102  		z3        float64
   103  		m0        int64
   104  		m1        int64
   105  		m2        int64
   106  		m3        int64
   107  		m00       uint32
   108  		m01       uint32
   109  		m02       uint32
   110  		m03       uint32
   111  		m10       uint32
   112  		m11       uint32
   113  		m12       uint32
   114  		m13       uint32
   115  		m20       uint32
   116  		m21       uint32
   117  		m22       uint32
   118  		m23       uint32
   119  		m30       uint32
   120  		m31       uint32
   121  		m32       uint32
   122  		m33       uint64
   123  		lbelow2   int32
   124  		lbelow3   int32
   125  		lbelow4   int32
   126  		lbelow5   int32
   127  		lbelow6   int32
   128  		lbelow7   int32
   129  		lbelow8   int32
   130  		lbelow9   int32
   131  		lbelow10  int32
   132  		lbelow11  int32
   133  		lbelow12  int32
   134  		lbelow13  int32
   135  		lbelow14  int32
   136  		lbelow15  int32
   137  		s00       uint32
   138  		s01       uint32
   139  		s02       uint32
   140  		s03       uint32
   141  		s10       uint32
   142  		s11       uint32
   143  		s12       uint32
   144  		s13       uint32
   145  		s20       uint32
   146  		s21       uint32
   147  		s22       uint32
   148  		s23       uint32
   149  		s30       uint32
   150  		s31       uint32
   151  		s32       uint32
   152  		s33       uint32
   153  		bits32    uint64
   154  		f         uint64
   155  		f0        uint64
   156  		f1        uint64
   157  		f2        uint64
   158  		f3        uint64
   159  		f4        uint64
   160  		g         uint64
   161  		g0        uint64
   162  		g1        uint64
   163  		g2        uint64
   164  		g3        uint64
   165  		g4        uint64
   166  	)
   167  
   168  	var p int32
   169  
   170  	l := int32(len(m))
   171  
   172  	r00 := uint32(r[0])
   173  
   174  	r01 := uint32(r[1])
   175  
   176  	r02 := uint32(r[2])
   177  	r0 := int64(2151)
   178  
   179  	r03 := uint32(r[3])
   180  	r03 &= 15
   181  	r0 <<= 51
   182  
   183  	r10 := uint32(r[4])
   184  	r10 &= 252
   185  	r01 <<= 8
   186  	r0 += int64(r00)
   187  
   188  	r11 := uint32(r[5])
   189  	r02 <<= 16
   190  	r0 += int64(r01)
   191  
   192  	r12 := uint32(r[6])
   193  	r03 <<= 24
   194  	r0 += int64(r02)
   195  
   196  	r13 := uint32(r[7])
   197  	r13 &= 15
   198  	r1 := int64(2215)
   199  	r0 += int64(r03)
   200  
   201  	d0 := r0
   202  	r1 <<= 51
   203  	r2 := int64(2279)
   204  
   205  	r20 := uint32(r[8])
   206  	r20 &= 252
   207  	r11 <<= 8
   208  	r1 += int64(r10)
   209  
   210  	r21 := uint32(r[9])
   211  	r12 <<= 16
   212  	r1 += int64(r11)
   213  
   214  	r22 := uint32(r[10])
   215  	r13 <<= 24
   216  	r1 += int64(r12)
   217  
   218  	r23 := uint32(r[11])
   219  	r23 &= 15
   220  	r2 <<= 51
   221  	r1 += int64(r13)
   222  
   223  	d1 := r1
   224  	r21 <<= 8
   225  	r2 += int64(r20)
   226  
   227  	r30 := uint32(r[12])
   228  	r30 &= 252
   229  	r22 <<= 16
   230  	r2 += int64(r21)
   231  
   232  	r31 := uint32(r[13])
   233  	r23 <<= 24
   234  	r2 += int64(r22)
   235  
   236  	r32 := uint32(r[14])
   237  	r2 += int64(r23)
   238  	r3 := int64(2343)
   239  
   240  	d2 := r2
   241  	r3 <<= 51
   242  
   243  	r33 := uint32(r[15])
   244  	r33 &= 15
   245  	r31 <<= 8
   246  	r3 += int64(r30)
   247  
   248  	r32 <<= 16
   249  	r3 += int64(r31)
   250  
   251  	r33 <<= 24
   252  	r3 += int64(r32)
   253  
   254  	r3 += int64(r33)
   255  	h0 := alpha32 - alpha32
   256  
   257  	d3 := r3
   258  	h1 := alpha32 - alpha32
   259  
   260  	h2 := alpha32 - alpha32
   261  
   262  	h3 := alpha32 - alpha32
   263  
   264  	h4 := alpha32 - alpha32
   265  
   266  	r0low := math.Float64frombits(uint64(d0))
   267  	h5 := alpha32 - alpha32
   268  
   269  	r1low := math.Float64frombits(uint64(d1))
   270  	h6 := alpha32 - alpha32
   271  
   272  	r2low := math.Float64frombits(uint64(d2))
   273  	h7 := alpha32 - alpha32
   274  
   275  	r0low -= alpha0
   276  
   277  	r1low -= alpha32
   278  
   279  	r2low -= alpha64
   280  
   281  	r0high := r0low + alpha18
   282  
   283  	r3low := math.Float64frombits(uint64(d3))
   284  
   285  	r1high := r1low + alpha50
   286  	sr1low := scale * r1low
   287  
   288  	r2high := r2low + alpha82
   289  	sr2low := scale * r2low
   290  
   291  	r0high -= alpha18
   292  	r0high_stack := r0high
   293  
   294  	r3low -= alpha96
   295  
   296  	r1high -= alpha50
   297  	r1high_stack := r1high
   298  
   299  	sr1high := sr1low + alpham80
   300  
   301  	r0low -= r0high
   302  
   303  	r2high -= alpha82
   304  	sr3low = scale * r3low
   305  
   306  	sr2high := sr2low + alpham48
   307  
   308  	r1low -= r1high
   309  	r1low_stack := r1low
   310  
   311  	sr1high -= alpham80
   312  	sr1high_stack := sr1high
   313  
   314  	r2low -= r2high
   315  	r2low_stack := r2low
   316  
   317  	sr2high -= alpham48
   318  	sr2high_stack := sr2high
   319  
   320  	r3high := r3low + alpha112
   321  	r0low_stack := r0low
   322  
   323  	sr1low -= sr1high
   324  	sr1low_stack := sr1low
   325  
   326  	sr3high := sr3low + alpham16
   327  	r2high_stack := r2high
   328  
   329  	sr2low -= sr2high
   330  	sr2low_stack := sr2low
   331  
   332  	r3high -= alpha112
   333  	r3high_stack := r3high
   334  
   335  	sr3high -= alpham16
   336  	sr3high_stack := sr3high
   337  
   338  	r3low -= r3high
   339  	r3low_stack := r3low
   340  
   341  	sr3low -= sr3high
   342  	sr3low_stack := sr3low
   343  
   344  	if l < 16 {
   345  		goto addatmost15bytes
   346  	}
   347  
   348  	m00 = uint32(m[p+0])
   349  	m0 = 2151
   350  
   351  	m0 <<= 51
   352  	m1 = 2215
   353  	m01 = uint32(m[p+1])
   354  
   355  	m1 <<= 51
   356  	m2 = 2279
   357  	m02 = uint32(m[p+2])
   358  
   359  	m2 <<= 51
   360  	m3 = 2343
   361  	m03 = uint32(m[p+3])
   362  
   363  	m10 = uint32(m[p+4])
   364  	m01 <<= 8
   365  	m0 += int64(m00)
   366  
   367  	m11 = uint32(m[p+5])
   368  	m02 <<= 16
   369  	m0 += int64(m01)
   370  
   371  	m12 = uint32(m[p+6])
   372  	m03 <<= 24
   373  	m0 += int64(m02)
   374  
   375  	m13 = uint32(m[p+7])
   376  	m3 <<= 51
   377  	m0 += int64(m03)
   378  
   379  	m20 = uint32(m[p+8])
   380  	m11 <<= 8
   381  	m1 += int64(m10)
   382  
   383  	m21 = uint32(m[p+9])
   384  	m12 <<= 16
   385  	m1 += int64(m11)
   386  
   387  	m22 = uint32(m[p+10])
   388  	m13 <<= 24
   389  	m1 += int64(m12)
   390  
   391  	m23 = uint32(m[p+11])
   392  	m1 += int64(m13)
   393  
   394  	m30 = uint32(m[p+12])
   395  	m21 <<= 8
   396  	m2 += int64(m20)
   397  
   398  	m31 = uint32(m[p+13])
   399  	m22 <<= 16
   400  	m2 += int64(m21)
   401  
   402  	m32 = uint32(m[p+14])
   403  	m23 <<= 24
   404  	m2 += int64(m22)
   405  
   406  	m33 = uint64(m[p+15])
   407  	m2 += int64(m23)
   408  
   409  	d0 = m0
   410  	m31 <<= 8
   411  	m3 += int64(m30)
   412  
   413  	d1 = m1
   414  	m32 <<= 16
   415  	m3 += int64(m31)
   416  
   417  	d2 = m2
   418  	m33 += 256
   419  
   420  	m33 <<= 24
   421  	m3 += int64(m32)
   422  
   423  	m3 += int64(m33)
   424  	d3 = m3
   425  
   426  	p += 16
   427  	l -= 16
   428  
   429  	z0 = math.Float64frombits(uint64(d0))
   430  
   431  	z1 = math.Float64frombits(uint64(d1))
   432  
   433  	z2 = math.Float64frombits(uint64(d2))
   434  
   435  	z3 = math.Float64frombits(uint64(d3))
   436  
   437  	z0 -= alpha0
   438  
   439  	z1 -= alpha32
   440  
   441  	z2 -= alpha64
   442  
   443  	z3 -= alpha96
   444  
   445  	h0 += z0
   446  
   447  	h1 += z1
   448  
   449  	h3 += z2
   450  
   451  	h5 += z3
   452  
   453  	if l < 16 {
   454  		goto multiplyaddatmost15bytes
   455  	}
   456  
   457  multiplyaddatleast16bytes:
   458  
   459  	m2 = 2279
   460  	m20 = uint32(m[p+8])
   461  	y7 = h7 + alpha130
   462  
   463  	m2 <<= 51
   464  	m3 = 2343
   465  	m21 = uint32(m[p+9])
   466  	y6 = h6 + alpha130
   467  
   468  	m3 <<= 51
   469  	m0 = 2151
   470  	m22 = uint32(m[p+10])
   471  	y1 = h1 + alpha32
   472  
   473  	m0 <<= 51
   474  	m1 = 2215
   475  	m23 = uint32(m[p+11])
   476  	y0 = h0 + alpha32
   477  
   478  	m1 <<= 51
   479  	m30 = uint32(m[p+12])
   480  	y7 -= alpha130
   481  
   482  	m21 <<= 8
   483  	m2 += int64(m20)
   484  	m31 = uint32(m[p+13])
   485  	y6 -= alpha130
   486  
   487  	m22 <<= 16
   488  	m2 += int64(m21)
   489  	m32 = uint32(m[p+14])
   490  	y1 -= alpha32
   491  
   492  	m23 <<= 24
   493  	m2 += int64(m22)
   494  	m33 = uint64(m[p+15])
   495  	y0 -= alpha32
   496  
   497  	m2 += int64(m23)
   498  	m00 = uint32(m[p+0])
   499  	y5 = h5 + alpha96
   500  
   501  	m31 <<= 8
   502  	m3 += int64(m30)
   503  	m01 = uint32(m[p+1])
   504  	y4 = h4 + alpha96
   505  
   506  	m32 <<= 16
   507  	m02 = uint32(m[p+2])
   508  	x7 = h7 - y7
   509  	y7 *= scale
   510  
   511  	m33 += 256
   512  	m03 = uint32(m[p+3])
   513  	x6 = h6 - y6
   514  	y6 *= scale
   515  
   516  	m33 <<= 24
   517  	m3 += int64(m31)
   518  	m10 = uint32(m[p+4])
   519  	x1 = h1 - y1
   520  
   521  	m01 <<= 8
   522  	m3 += int64(m32)
   523  	m11 = uint32(m[p+5])
   524  	x0 = h0 - y0
   525  
   526  	m3 += int64(m33)
   527  	m0 += int64(m00)
   528  	m12 = uint32(m[p+6])
   529  	y5 -= alpha96
   530  
   531  	m02 <<= 16
   532  	m0 += int64(m01)
   533  	m13 = uint32(m[p+7])
   534  	y4 -= alpha96
   535  
   536  	m03 <<= 24
   537  	m0 += int64(m02)
   538  	d2 = m2
   539  	x1 += y7
   540  
   541  	m0 += int64(m03)
   542  	d3 = m3
   543  	x0 += y6
   544  
   545  	m11 <<= 8
   546  	m1 += int64(m10)
   547  	d0 = m0
   548  	x7 += y5
   549  
   550  	m12 <<= 16
   551  	m1 += int64(m11)
   552  	x6 += y4
   553  
   554  	m13 <<= 24
   555  	m1 += int64(m12)
   556  	y3 = h3 + alpha64
   557  
   558  	m1 += int64(m13)
   559  	d1 = m1
   560  	y2 = h2 + alpha64
   561  
   562  	x0 += x1
   563  
   564  	x6 += x7
   565  
   566  	y3 -= alpha64
   567  	r3low = r3low_stack
   568  
   569  	y2 -= alpha64
   570  	r0low = r0low_stack
   571  
   572  	x5 = h5 - y5
   573  	r3lowx0 = r3low * x0
   574  	r3high = r3high_stack
   575  
   576  	x4 = h4 - y4
   577  	r0lowx6 = r0low * x6
   578  	r0high = r0high_stack
   579  
   580  	x3 = h3 - y3
   581  	r3highx0 = r3high * x0
   582  	sr1low = sr1low_stack
   583  
   584  	x2 = h2 - y2
   585  	r0highx6 = r0high * x6
   586  	sr1high = sr1high_stack
   587  
   588  	x5 += y3
   589  	r0lowx0 = r0low * x0
   590  	r1low = r1low_stack
   591  
   592  	h6 = r3lowx0 + r0lowx6
   593  	sr1lowx6 = sr1low * x6
   594  	r1high = r1high_stack
   595  
   596  	x4 += y2
   597  	r0highx0 = r0high * x0
   598  	sr2low = sr2low_stack
   599  
   600  	h7 = r3highx0 + r0highx6
   601  	sr1highx6 = sr1high * x6
   602  	sr2high = sr2high_stack
   603  
   604  	x3 += y1
   605  	r1lowx0 = r1low * x0
   606  	r2low = r2low_stack
   607  
   608  	h0 = r0lowx0 + sr1lowx6
   609  	sr2lowx6 = sr2low * x6
   610  	r2high = r2high_stack
   611  
   612  	x2 += y0
   613  	r1highx0 = r1high * x0
   614  	sr3low = sr3low_stack
   615  
   616  	h1 = r0highx0 + sr1highx6
   617  	sr2highx6 = sr2high * x6
   618  	sr3high = sr3high_stack
   619  
   620  	x4 += x5
   621  	r2lowx0 = r2low * x0
   622  	z2 = math.Float64frombits(uint64(d2))
   623  
   624  	h2 = r1lowx0 + sr2lowx6
   625  	sr3lowx6 = sr3low * x6
   626  
   627  	x2 += x3
   628  	r2highx0 = r2high * x0
   629  	z3 = math.Float64frombits(uint64(d3))
   630  
   631  	h3 = r1highx0 + sr2highx6
   632  	sr3highx6 = sr3high * x6
   633  
   634  	r1highx4 = r1high * x4
   635  	z2 -= alpha64
   636  
   637  	h4 = r2lowx0 + sr3lowx6
   638  	r1lowx4 = r1low * x4
   639  
   640  	r0highx4 = r0high * x4
   641  	z3 -= alpha96
   642  
   643  	h5 = r2highx0 + sr3highx6
   644  	r0lowx4 = r0low * x4
   645  
   646  	h7 += r1highx4
   647  	sr3highx4 = sr3high * x4
   648  
   649  	h6 += r1lowx4
   650  	sr3lowx4 = sr3low * x4
   651  
   652  	h5 += r0highx4
   653  	sr2highx4 = sr2high * x4
   654  
   655  	h4 += r0lowx4
   656  	sr2lowx4 = sr2low * x4
   657  
   658  	h3 += sr3highx4
   659  	r0lowx2 = r0low * x2
   660  
   661  	h2 += sr3lowx4
   662  	r0highx2 = r0high * x2
   663  
   664  	h1 += sr2highx4
   665  	r1lowx2 = r1low * x2
   666  
   667  	h0 += sr2lowx4
   668  	r1highx2 = r1high * x2
   669  
   670  	h2 += r0lowx2
   671  	r2lowx2 = r2low * x2
   672  
   673  	h3 += r0highx2
   674  	r2highx2 = r2high * x2
   675  
   676  	h4 += r1lowx2
   677  	sr3lowx2 = sr3low * x2
   678  
   679  	h5 += r1highx2
   680  	sr3highx2 = sr3high * x2
   681  
   682  	p += 16
   683  	l -= 16
   684  	h6 += r2lowx2
   685  
   686  	h7 += r2highx2
   687  
   688  	z1 = math.Float64frombits(uint64(d1))
   689  	h0 += sr3lowx2
   690  
   691  	z0 = math.Float64frombits(uint64(d0))
   692  	h1 += sr3highx2
   693  
   694  	z1 -= alpha32
   695  
   696  	z0 -= alpha0
   697  
   698  	h5 += z3
   699  
   700  	h3 += z2
   701  
   702  	h1 += z1
   703  
   704  	h0 += z0
   705  
   706  	if l >= 16 {
   707  		goto multiplyaddatleast16bytes
   708  	}
   709  
   710  multiplyaddatmost15bytes:
   711  
   712  	y7 = h7 + alpha130
   713  
   714  	y6 = h6 + alpha130
   715  
   716  	y1 = h1 + alpha32
   717  
   718  	y0 = h0 + alpha32
   719  
   720  	y7 -= alpha130
   721  
   722  	y6 -= alpha130
   723  
   724  	y1 -= alpha32
   725  
   726  	y0 -= alpha32
   727  
   728  	y5 = h5 + alpha96
   729  
   730  	y4 = h4 + alpha96
   731  
   732  	x7 = h7 - y7
   733  	y7 *= scale
   734  
   735  	x6 = h6 - y6
   736  	y6 *= scale
   737  
   738  	x1 = h1 - y1
   739  
   740  	x0 = h0 - y0
   741  
   742  	y5 -= alpha96
   743  
   744  	y4 -= alpha96
   745  
   746  	x1 += y7
   747  
   748  	x0 += y6
   749  
   750  	x7 += y5
   751  
   752  	x6 += y4
   753  
   754  	y3 = h3 + alpha64
   755  
   756  	y2 = h2 + alpha64
   757  
   758  	x0 += x1
   759  
   760  	x6 += x7
   761  
   762  	y3 -= alpha64
   763  	r3low = r3low_stack
   764  
   765  	y2 -= alpha64
   766  	r0low = r0low_stack
   767  
   768  	x5 = h5 - y5
   769  	r3lowx0 = r3low * x0
   770  	r3high = r3high_stack
   771  
   772  	x4 = h4 - y4
   773  	r0lowx6 = r0low * x6
   774  	r0high = r0high_stack
   775  
   776  	x3 = h3 - y3
   777  	r3highx0 = r3high * x0
   778  	sr1low = sr1low_stack
   779  
   780  	x2 = h2 - y2
   781  	r0highx6 = r0high * x6
   782  	sr1high = sr1high_stack
   783  
   784  	x5 += y3
   785  	r0lowx0 = r0low * x0
   786  	r1low = r1low_stack
   787  
   788  	h6 = r3lowx0 + r0lowx6
   789  	sr1lowx6 = sr1low * x6
   790  	r1high = r1high_stack
   791  
   792  	x4 += y2
   793  	r0highx0 = r0high * x0
   794  	sr2low = sr2low_stack
   795  
   796  	h7 = r3highx0 + r0highx6
   797  	sr1highx6 = sr1high * x6
   798  	sr2high = sr2high_stack
   799  
   800  	x3 += y1
   801  	r1lowx0 = r1low * x0
   802  	r2low = r2low_stack
   803  
   804  	h0 = r0lowx0 + sr1lowx6
   805  	sr2lowx6 = sr2low * x6
   806  	r2high = r2high_stack
   807  
   808  	x2 += y0
   809  	r1highx0 = r1high * x0
   810  	sr3low = sr3low_stack
   811  
   812  	h1 = r0highx0 + sr1highx6
   813  	sr2highx6 = sr2high * x6
   814  	sr3high = sr3high_stack
   815  
   816  	x4 += x5
   817  	r2lowx0 = r2low * x0
   818  
   819  	h2 = r1lowx0 + sr2lowx6
   820  	sr3lowx6 = sr3low * x6
   821  
   822  	x2 += x3
   823  	r2highx0 = r2high * x0
   824  
   825  	h3 = r1highx0 + sr2highx6
   826  	sr3highx6 = sr3high * x6
   827  
   828  	r1highx4 = r1high * x4
   829  
   830  	h4 = r2lowx0 + sr3lowx6
   831  	r1lowx4 = r1low * x4
   832  
   833  	r0highx4 = r0high * x4
   834  
   835  	h5 = r2highx0 + sr3highx6
   836  	r0lowx4 = r0low * x4
   837  
   838  	h7 += r1highx4
   839  	sr3highx4 = sr3high * x4
   840  
   841  	h6 += r1lowx4
   842  	sr3lowx4 = sr3low * x4
   843  
   844  	h5 += r0highx4
   845  	sr2highx4 = sr2high * x4
   846  
   847  	h4 += r0lowx4
   848  	sr2lowx4 = sr2low * x4
   849  
   850  	h3 += sr3highx4
   851  	r0lowx2 = r0low * x2
   852  
   853  	h2 += sr3lowx4
   854  	r0highx2 = r0high * x2
   855  
   856  	h1 += sr2highx4
   857  	r1lowx2 = r1low * x2
   858  
   859  	h0 += sr2lowx4
   860  	r1highx2 = r1high * x2
   861  
   862  	h2 += r0lowx2
   863  	r2lowx2 = r2low * x2
   864  
   865  	h3 += r0highx2
   866  	r2highx2 = r2high * x2
   867  
   868  	h4 += r1lowx2
   869  	sr3lowx2 = sr3low * x2
   870  
   871  	h5 += r1highx2
   872  	sr3highx2 = sr3high * x2
   873  
   874  	h6 += r2lowx2
   875  
   876  	h7 += r2highx2
   877  
   878  	h0 += sr3lowx2
   879  
   880  	h1 += sr3highx2
   881  
   882  addatmost15bytes:
   883  
   884  	if l == 0 {
   885  		goto nomorebytes
   886  	}
   887  
   888  	lbelow2 = l - 2
   889  
   890  	lbelow3 = l - 3
   891  
   892  	lbelow2 >>= 31
   893  	lbelow4 = l - 4
   894  
   895  	m00 = uint32(m[p+0])
   896  	lbelow3 >>= 31
   897  	p += lbelow2
   898  
   899  	m01 = uint32(m[p+1])
   900  	lbelow4 >>= 31
   901  	p += lbelow3
   902  
   903  	m02 = uint32(m[p+2])
   904  	p += lbelow4
   905  	m0 = 2151
   906  
   907  	m03 = uint32(m[p+3])
   908  	m0 <<= 51
   909  	m1 = 2215
   910  
   911  	m0 += int64(m00)
   912  	m01 &^= uint32(lbelow2)
   913  
   914  	m02 &^= uint32(lbelow3)
   915  	m01 -= uint32(lbelow2)
   916  
   917  	m01 <<= 8
   918  	m03 &^= uint32(lbelow4)
   919  
   920  	m0 += int64(m01)
   921  	lbelow2 -= lbelow3
   922  
   923  	m02 += uint32(lbelow2)
   924  	lbelow3 -= lbelow4
   925  
   926  	m02 <<= 16
   927  	m03 += uint32(lbelow3)
   928  
   929  	m03 <<= 24
   930  	m0 += int64(m02)
   931  
   932  	m0 += int64(m03)
   933  	lbelow5 = l - 5
   934  
   935  	lbelow6 = l - 6
   936  	lbelow7 = l - 7
   937  
   938  	lbelow5 >>= 31
   939  	lbelow8 = l - 8
   940  
   941  	lbelow6 >>= 31
   942  	p += lbelow5
   943  
   944  	m10 = uint32(m[p+4])
   945  	lbelow7 >>= 31
   946  	p += lbelow6
   947  
   948  	m11 = uint32(m[p+5])
   949  	lbelow8 >>= 31
   950  	p += lbelow7
   951  
   952  	m12 = uint32(m[p+6])
   953  	m1 <<= 51
   954  	p += lbelow8
   955  
   956  	m13 = uint32(m[p+7])
   957  	m10 &^= uint32(lbelow5)
   958  	lbelow4 -= lbelow5
   959  
   960  	m10 += uint32(lbelow4)
   961  	lbelow5 -= lbelow6
   962  
   963  	m11 &^= uint32(lbelow6)
   964  	m11 += uint32(lbelow5)
   965  
   966  	m11 <<= 8
   967  	m1 += int64(m10)
   968  
   969  	m1 += int64(m11)
   970  	m12 &^= uint32(lbelow7)
   971  
   972  	lbelow6 -= lbelow7
   973  	m13 &^= uint32(lbelow8)
   974  
   975  	m12 += uint32(lbelow6)
   976  	lbelow7 -= lbelow8
   977  
   978  	m12 <<= 16
   979  	m13 += uint32(lbelow7)
   980  
   981  	m13 <<= 24
   982  	m1 += int64(m12)
   983  
   984  	m1 += int64(m13)
   985  	m2 = 2279
   986  
   987  	lbelow9 = l - 9
   988  	m3 = 2343
   989  
   990  	lbelow10 = l - 10
   991  	lbelow11 = l - 11
   992  
   993  	lbelow9 >>= 31
   994  	lbelow12 = l - 12
   995  
   996  	lbelow10 >>= 31
   997  	p += lbelow9
   998  
   999  	m20 = uint32(m[p+8])
  1000  	lbelow11 >>= 31
  1001  	p += lbelow10
  1002  
  1003  	m21 = uint32(m[p+9])
  1004  	lbelow12 >>= 31
  1005  	p += lbelow11
  1006  
  1007  	m22 = uint32(m[p+10])
  1008  	m2 <<= 51
  1009  	p += lbelow12
  1010  
  1011  	m23 = uint32(m[p+11])
  1012  	m20 &^= uint32(lbelow9)
  1013  	lbelow8 -= lbelow9
  1014  
  1015  	m20 += uint32(lbelow8)
  1016  	lbelow9 -= lbelow10
  1017  
  1018  	m21 &^= uint32(lbelow10)
  1019  	m21 += uint32(lbelow9)
  1020  
  1021  	m21 <<= 8
  1022  	m2 += int64(m20)
  1023  
  1024  	m2 += int64(m21)
  1025  	m22 &^= uint32(lbelow11)
  1026  
  1027  	lbelow10 -= lbelow11
  1028  	m23 &^= uint32(lbelow12)
  1029  
  1030  	m22 += uint32(lbelow10)
  1031  	lbelow11 -= lbelow12
  1032  
  1033  	m22 <<= 16
  1034  	m23 += uint32(lbelow11)
  1035  
  1036  	m23 <<= 24
  1037  	m2 += int64(m22)
  1038  
  1039  	m3 <<= 51
  1040  	lbelow13 = l - 13
  1041  
  1042  	lbelow13 >>= 31
  1043  	lbelow14 = l - 14
  1044  
  1045  	lbelow14 >>= 31
  1046  	p += lbelow13
  1047  	lbelow15 = l - 15
  1048  
  1049  	m30 = uint32(m[p+12])
  1050  	lbelow15 >>= 31
  1051  	p += lbelow14
  1052  
  1053  	m31 = uint32(m[p+13])
  1054  	p += lbelow15
  1055  	m2 += int64(m23)
  1056  
  1057  	m32 = uint32(m[p+14])
  1058  	m30 &^= uint32(lbelow13)
  1059  	lbelow12 -= lbelow13
  1060  
  1061  	m30 += uint32(lbelow12)
  1062  	lbelow13 -= lbelow14
  1063  
  1064  	m3 += int64(m30)
  1065  	m31 &^= uint32(lbelow14)
  1066  
  1067  	m31 += uint32(lbelow13)
  1068  	m32 &^= uint32(lbelow15)
  1069  
  1070  	m31 <<= 8
  1071  	lbelow14 -= lbelow15
  1072  
  1073  	m3 += int64(m31)
  1074  	m32 += uint32(lbelow14)
  1075  	d0 = m0
  1076  
  1077  	m32 <<= 16
  1078  	m33 = uint64(lbelow15 + 1)
  1079  	d1 = m1
  1080  
  1081  	m33 <<= 24
  1082  	m3 += int64(m32)
  1083  	d2 = m2
  1084  
  1085  	m3 += int64(m33)
  1086  	d3 = m3
  1087  
  1088  	z3 = math.Float64frombits(uint64(d3))
  1089  
  1090  	z2 = math.Float64frombits(uint64(d2))
  1091  
  1092  	z1 = math.Float64frombits(uint64(d1))
  1093  
  1094  	z0 = math.Float64frombits(uint64(d0))
  1095  
  1096  	z3 -= alpha96
  1097  
  1098  	z2 -= alpha64
  1099  
  1100  	z1 -= alpha32
  1101  
  1102  	z0 -= alpha0
  1103  
  1104  	h5 += z3
  1105  
  1106  	h3 += z2
  1107  
  1108  	h1 += z1
  1109  
  1110  	h0 += z0
  1111  
  1112  	y7 = h7 + alpha130
  1113  
  1114  	y6 = h6 + alpha130
  1115  
  1116  	y1 = h1 + alpha32
  1117  
  1118  	y0 = h0 + alpha32
  1119  
  1120  	y7 -= alpha130
  1121  
  1122  	y6 -= alpha130
  1123  
  1124  	y1 -= alpha32
  1125  
  1126  	y0 -= alpha32
  1127  
  1128  	y5 = h5 + alpha96
  1129  
  1130  	y4 = h4 + alpha96
  1131  
  1132  	x7 = h7 - y7
  1133  	y7 *= scale
  1134  
  1135  	x6 = h6 - y6
  1136  	y6 *= scale
  1137  
  1138  	x1 = h1 - y1
  1139  
  1140  	x0 = h0 - y0
  1141  
  1142  	y5 -= alpha96
  1143  
  1144  	y4 -= alpha96
  1145  
  1146  	x1 += y7
  1147  
  1148  	x0 += y6
  1149  
  1150  	x7 += y5
  1151  
  1152  	x6 += y4
  1153  
  1154  	y3 = h3 + alpha64
  1155  
  1156  	y2 = h2 + alpha64
  1157  
  1158  	x0 += x1
  1159  
  1160  	x6 += x7
  1161  
  1162  	y3 -= alpha64
  1163  	r3low = r3low_stack
  1164  
  1165  	y2 -= alpha64
  1166  	r0low = r0low_stack
  1167  
  1168  	x5 = h5 - y5
  1169  	r3lowx0 = r3low * x0
  1170  	r3high = r3high_stack
  1171  
  1172  	x4 = h4 - y4
  1173  	r0lowx6 = r0low * x6
  1174  	r0high = r0high_stack
  1175  
  1176  	x3 = h3 - y3
  1177  	r3highx0 = r3high * x0
  1178  	sr1low = sr1low_stack
  1179  
  1180  	x2 = h2 - y2
  1181  	r0highx6 = r0high * x6
  1182  	sr1high = sr1high_stack
  1183  
  1184  	x5 += y3
  1185  	r0lowx0 = r0low * x0
  1186  	r1low = r1low_stack
  1187  
  1188  	h6 = r3lowx0 + r0lowx6
  1189  	sr1lowx6 = sr1low * x6
  1190  	r1high = r1high_stack
  1191  
  1192  	x4 += y2
  1193  	r0highx0 = r0high * x0
  1194  	sr2low = sr2low_stack
  1195  
  1196  	h7 = r3highx0 + r0highx6
  1197  	sr1highx6 = sr1high * x6
  1198  	sr2high = sr2high_stack
  1199  
  1200  	x3 += y1
  1201  	r1lowx0 = r1low * x0
  1202  	r2low = r2low_stack
  1203  
  1204  	h0 = r0lowx0 + sr1lowx6
  1205  	sr2lowx6 = sr2low * x6
  1206  	r2high = r2high_stack
  1207  
  1208  	x2 += y0
  1209  	r1highx0 = r1high * x0
  1210  	sr3low = sr3low_stack
  1211  
  1212  	h1 = r0highx0 + sr1highx6
  1213  	sr2highx6 = sr2high * x6
  1214  	sr3high = sr3high_stack
  1215  
  1216  	x4 += x5
  1217  	r2lowx0 = r2low * x0
  1218  
  1219  	h2 = r1lowx0 + sr2lowx6
  1220  	sr3lowx6 = sr3low * x6
  1221  
  1222  	x2 += x3
  1223  	r2highx0 = r2high * x0
  1224  
  1225  	h3 = r1highx0 + sr2highx6
  1226  	sr3highx6 = sr3high * x6
  1227  
  1228  	r1highx4 = r1high * x4
  1229  
  1230  	h4 = r2lowx0 + sr3lowx6
  1231  	r1lowx4 = r1low * x4
  1232  
  1233  	r0highx4 = r0high * x4
  1234  
  1235  	h5 = r2highx0 + sr3highx6
  1236  	r0lowx4 = r0low * x4
  1237  
  1238  	h7 += r1highx4
  1239  	sr3highx4 = sr3high * x4
  1240  
  1241  	h6 += r1lowx4
  1242  	sr3lowx4 = sr3low * x4
  1243  
  1244  	h5 += r0highx4
  1245  	sr2highx4 = sr2high * x4
  1246  
  1247  	h4 += r0lowx4
  1248  	sr2lowx4 = sr2low * x4
  1249  
  1250  	h3 += sr3highx4
  1251  	r0lowx2 = r0low * x2
  1252  
  1253  	h2 += sr3lowx4
  1254  	r0highx2 = r0high * x2
  1255  
  1256  	h1 += sr2highx4
  1257  	r1lowx2 = r1low * x2
  1258  
  1259  	h0 += sr2lowx4
  1260  	r1highx2 = r1high * x2
  1261  
  1262  	h2 += r0lowx2
  1263  	r2lowx2 = r2low * x2
  1264  
  1265  	h3 += r0highx2
  1266  	r2highx2 = r2high * x2
  1267  
  1268  	h4 += r1lowx2
  1269  	sr3lowx2 = sr3low * x2
  1270  
  1271  	h5 += r1highx2
  1272  	sr3highx2 = sr3high * x2
  1273  
  1274  	h6 += r2lowx2
  1275  
  1276  	h7 += r2highx2
  1277  
  1278  	h0 += sr3lowx2
  1279  
  1280  	h1 += sr3highx2
  1281  
  1282  nomorebytes:
  1283  
  1284  	y7 = h7 + alpha130
  1285  
  1286  	y0 = h0 + alpha32
  1287  
  1288  	y1 = h1 + alpha32
  1289  
  1290  	y2 = h2 + alpha64
  1291  
  1292  	y7 -= alpha130
  1293  
  1294  	y3 = h3 + alpha64
  1295  
  1296  	y4 = h4 + alpha96
  1297  
  1298  	y5 = h5 + alpha96
  1299  
  1300  	x7 = h7 - y7
  1301  	y7 *= scale
  1302  
  1303  	y0 -= alpha32
  1304  
  1305  	y1 -= alpha32
  1306  
  1307  	y2 -= alpha64
  1308  
  1309  	h6 += x7
  1310  
  1311  	y3 -= alpha64
  1312  
  1313  	y4 -= alpha96
  1314  
  1315  	y5 -= alpha96
  1316  
  1317  	y6 = h6 + alpha130
  1318  
  1319  	x0 = h0 - y0
  1320  
  1321  	x1 = h1 - y1
  1322  
  1323  	x2 = h2 - y2
  1324  
  1325  	y6 -= alpha130
  1326  
  1327  	x0 += y7
  1328  
  1329  	x3 = h3 - y3
  1330  
  1331  	x4 = h4 - y4
  1332  
  1333  	x5 = h5 - y5
  1334  
  1335  	x6 = h6 - y6
  1336  
  1337  	y6 *= scale
  1338  
  1339  	x2 += y0
  1340  
  1341  	x3 += y1
  1342  
  1343  	x4 += y2
  1344  
  1345  	x0 += y6
  1346  
  1347  	x5 += y3
  1348  
  1349  	x6 += y4
  1350  
  1351  	x2 += x3
  1352  
  1353  	x0 += x1
  1354  
  1355  	x4 += x5
  1356  
  1357  	x6 += y5
  1358  
  1359  	x2 += offset1
  1360  	d1 = int64(math.Float64bits(x2))
  1361  
  1362  	x0 += offset0
  1363  	d0 = int64(math.Float64bits(x0))
  1364  
  1365  	x4 += offset2
  1366  	d2 = int64(math.Float64bits(x4))
  1367  
  1368  	x6 += offset3
  1369  	d3 = int64(math.Float64bits(x6))
  1370  
  1371  	f0 = uint64(d0)
  1372  
  1373  	f1 = uint64(d1)
  1374  	bits32 = math.MaxUint64
  1375  
  1376  	f2 = uint64(d2)
  1377  	bits32 >>= 32
  1378  
  1379  	f3 = uint64(d3)
  1380  	f = f0 >> 32
  1381  
  1382  	f0 &= bits32
  1383  	f &= 255
  1384  
  1385  	f1 += f
  1386  	g0 = f0 + 5
  1387  
  1388  	g = g0 >> 32
  1389  	g0 &= bits32
  1390  
  1391  	f = f1 >> 32
  1392  	f1 &= bits32
  1393  
  1394  	f &= 255
  1395  	g1 = f1 + g
  1396  
  1397  	g = g1 >> 32
  1398  	f2 += f
  1399  
  1400  	f = f2 >> 32
  1401  	g1 &= bits32
  1402  
  1403  	f2 &= bits32
  1404  	f &= 255
  1405  
  1406  	f3 += f
  1407  	g2 = f2 + g
  1408  
  1409  	g = g2 >> 32
  1410  	g2 &= bits32
  1411  
  1412  	f4 = f3 >> 32
  1413  	f3 &= bits32
  1414  
  1415  	f4 &= 255
  1416  	g3 = f3 + g
  1417  
  1418  	g = g3 >> 32
  1419  	g3 &= bits32
  1420  
  1421  	g4 = f4 + g
  1422  
  1423  	g4 = g4 - 4
  1424  	s00 = uint32(s[0])
  1425  
  1426  	f = uint64(int64(g4) >> 63)
  1427  	s01 = uint32(s[1])
  1428  
  1429  	f0 &= f
  1430  	g0 &^= f
  1431  	s02 = uint32(s[2])
  1432  
  1433  	f1 &= f
  1434  	f0 |= g0
  1435  	s03 = uint32(s[3])
  1436  
  1437  	g1 &^= f
  1438  	f2 &= f
  1439  	s10 = uint32(s[4])
  1440  
  1441  	f3 &= f
  1442  	g2 &^= f
  1443  	s11 = uint32(s[5])
  1444  
  1445  	g3 &^= f
  1446  	f1 |= g1
  1447  	s12 = uint32(s[6])
  1448  
  1449  	f2 |= g2
  1450  	f3 |= g3
  1451  	s13 = uint32(s[7])
  1452  
  1453  	s01 <<= 8
  1454  	f0 += uint64(s00)
  1455  	s20 = uint32(s[8])
  1456  
  1457  	s02 <<= 16
  1458  	f0 += uint64(s01)
  1459  	s21 = uint32(s[9])
  1460  
  1461  	s03 <<= 24
  1462  	f0 += uint64(s02)
  1463  	s22 = uint32(s[10])
  1464  
  1465  	s11 <<= 8
  1466  	f1 += uint64(s10)
  1467  	s23 = uint32(s[11])
  1468  
  1469  	s12 <<= 16
  1470  	f1 += uint64(s11)
  1471  	s30 = uint32(s[12])
  1472  
  1473  	s13 <<= 24
  1474  	f1 += uint64(s12)
  1475  	s31 = uint32(s[13])
  1476  
  1477  	f0 += uint64(s03)
  1478  	f1 += uint64(s13)
  1479  	s32 = uint32(s[14])
  1480  
  1481  	s21 <<= 8
  1482  	f2 += uint64(s20)
  1483  	s33 = uint32(s[15])
  1484  
  1485  	s22 <<= 16
  1486  	f2 += uint64(s21)
  1487  
  1488  	s23 <<= 24
  1489  	f2 += uint64(s22)
  1490  
  1491  	s31 <<= 8
  1492  	f3 += uint64(s30)
  1493  
  1494  	s32 <<= 16
  1495  	f3 += uint64(s31)
  1496  
  1497  	s33 <<= 24
  1498  	f3 += uint64(s32)
  1499  
  1500  	f2 += uint64(s23)
  1501  	f3 += uint64(s33)
  1502  
  1503  	out[0] = byte(f0)
  1504  	f0 >>= 8
  1505  	out[1] = byte(f0)
  1506  	f0 >>= 8
  1507  	out[2] = byte(f0)
  1508  	f0 >>= 8
  1509  	out[3] = byte(f0)
  1510  	f0 >>= 8
  1511  	f1 += f0
  1512  
  1513  	out[4] = byte(f1)
  1514  	f1 >>= 8
  1515  	out[5] = byte(f1)
  1516  	f1 >>= 8
  1517  	out[6] = byte(f1)
  1518  	f1 >>= 8
  1519  	out[7] = byte(f1)
  1520  	f1 >>= 8
  1521  	f2 += f1
  1522  
  1523  	out[8] = byte(f2)
  1524  	f2 >>= 8
  1525  	out[9] = byte(f2)
  1526  	f2 >>= 8
  1527  	out[10] = byte(f2)
  1528  	f2 >>= 8
  1529  	out[11] = byte(f2)
  1530  	f2 >>= 8
  1531  	f3 += f2
  1532  
  1533  	out[12] = byte(f3)
  1534  	f3 >>= 8
  1535  	out[13] = byte(f3)
  1536  	f3 >>= 8
  1537  	out[14] = byte(f3)
  1538  	f3 >>= 8
  1539  	out[15] = byte(f3)
  1540  }