github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/blas/gonum/level2float32.go (about)

     1  // Code generated by "go generate github.com/jingcheng-WU/gonum/blas/gonum”; DO NOT EDIT.
     2  
     3  // Copyright ©2014 The Gonum Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package gonum
     8  
     9  import (
    10  	"github.com/jingcheng-WU/gonum/blas"
    11  	"github.com/jingcheng-WU/gonum/internal/asm/f32"
    12  )
    13  
    14  var _ blas.Float32Level2 = Implementation{}
    15  
    16  // Sger performs the rank-one operation
    17  //  A += alpha * x * yᵀ
    18  // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
    19  //
    20  // Float32 implementations are autogenerated and not directly tested.
    21  func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) {
    22  	if m < 0 {
    23  		panic(mLT0)
    24  	}
    25  	if n < 0 {
    26  		panic(nLT0)
    27  	}
    28  	if lda < max(1, n) {
    29  		panic(badLdA)
    30  	}
    31  	if incX == 0 {
    32  		panic(zeroIncX)
    33  	}
    34  	if incY == 0 {
    35  		panic(zeroIncY)
    36  	}
    37  
    38  	// Quick return if possible.
    39  	if m == 0 || n == 0 {
    40  		return
    41  	}
    42  
    43  	// For zero matrix size the following slice length checks are trivially satisfied.
    44  	if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) {
    45  		panic(shortX)
    46  	}
    47  	if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
    48  		panic(shortY)
    49  	}
    50  	if len(a) < lda*(m-1)+n {
    51  		panic(shortA)
    52  	}
    53  
    54  	// Quick return if possible.
    55  	if alpha == 0 {
    56  		return
    57  	}
    58  	f32.Ger(uintptr(m), uintptr(n),
    59  		alpha,
    60  		x, uintptr(incX),
    61  		y, uintptr(incY),
    62  		a, uintptr(lda))
    63  }
    64  
    65  // Sgbmv performs one of the matrix-vector operations
    66  //  y = alpha * A * x + beta * y   if tA == blas.NoTrans
    67  //  y = alpha * Aᵀ * x + beta * y  if tA == blas.Trans or blas.ConjTrans
    68  // where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals,
    69  // x and y are vectors, and alpha and beta are scalars.
    70  //
    71  // Float32 implementations are autogenerated and not directly tested.
    72  func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
    73  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
    74  		panic(badTranspose)
    75  	}
    76  	if m < 0 {
    77  		panic(mLT0)
    78  	}
    79  	if n < 0 {
    80  		panic(nLT0)
    81  	}
    82  	if kL < 0 {
    83  		panic(kLLT0)
    84  	}
    85  	if kU < 0 {
    86  		panic(kULT0)
    87  	}
    88  	if lda < kL+kU+1 {
    89  		panic(badLdA)
    90  	}
    91  	if incX == 0 {
    92  		panic(zeroIncX)
    93  	}
    94  	if incY == 0 {
    95  		panic(zeroIncY)
    96  	}
    97  
    98  	// Quick return if possible.
    99  	if m == 0 || n == 0 {
   100  		return
   101  	}
   102  
   103  	// For zero matrix size the following slice length checks are trivially satisfied.
   104  	if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 {
   105  		panic(shortA)
   106  	}
   107  	lenX := m
   108  	lenY := n
   109  	if tA == blas.NoTrans {
   110  		lenX = n
   111  		lenY = m
   112  	}
   113  	if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) {
   114  		panic(shortX)
   115  	}
   116  	if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) {
   117  		panic(shortY)
   118  	}
   119  
   120  	// Quick return if possible.
   121  	if alpha == 0 && beta == 1 {
   122  		return
   123  	}
   124  
   125  	var kx, ky int
   126  	if incX < 0 {
   127  		kx = -(lenX - 1) * incX
   128  	}
   129  	if incY < 0 {
   130  		ky = -(lenY - 1) * incY
   131  	}
   132  
   133  	// Form y = beta * y.
   134  	if beta != 1 {
   135  		if incY == 1 {
   136  			if beta == 0 {
   137  				for i := range y[:lenY] {
   138  					y[i] = 0
   139  				}
   140  			} else {
   141  				f32.ScalUnitary(beta, y[:lenY])
   142  			}
   143  		} else {
   144  			iy := ky
   145  			if beta == 0 {
   146  				for i := 0; i < lenY; i++ {
   147  					y[iy] = 0
   148  					iy += incY
   149  				}
   150  			} else {
   151  				if incY > 0 {
   152  					f32.ScalInc(beta, y, uintptr(lenY), uintptr(incY))
   153  				} else {
   154  					f32.ScalInc(beta, y, uintptr(lenY), uintptr(-incY))
   155  				}
   156  			}
   157  		}
   158  	}
   159  
   160  	if alpha == 0 {
   161  		return
   162  	}
   163  
   164  	// i and j are indices of the compacted banded matrix.
   165  	// off is the offset into the dense matrix (off + j = densej)
   166  	nCol := kU + 1 + kL
   167  	if tA == blas.NoTrans {
   168  		iy := ky
   169  		if incX == 1 {
   170  			for i := 0; i < min(m, n+kL); i++ {
   171  				l := max(0, kL-i)
   172  				u := min(nCol, n+kL-i)
   173  				off := max(0, i-kL)
   174  				atmp := a[i*lda+l : i*lda+u]
   175  				xtmp := x[off : off+u-l]
   176  				var sum float32
   177  				for j, v := range atmp {
   178  					sum += xtmp[j] * v
   179  				}
   180  				y[iy] += sum * alpha
   181  				iy += incY
   182  			}
   183  			return
   184  		}
   185  		for i := 0; i < min(m, n+kL); i++ {
   186  			l := max(0, kL-i)
   187  			u := min(nCol, n+kL-i)
   188  			off := max(0, i-kL)
   189  			atmp := a[i*lda+l : i*lda+u]
   190  			jx := kx
   191  			var sum float32
   192  			for _, v := range atmp {
   193  				sum += x[off*incX+jx] * v
   194  				jx += incX
   195  			}
   196  			y[iy] += sum * alpha
   197  			iy += incY
   198  		}
   199  		return
   200  	}
   201  	if incX == 1 {
   202  		for i := 0; i < min(m, n+kL); i++ {
   203  			l := max(0, kL-i)
   204  			u := min(nCol, n+kL-i)
   205  			off := max(0, i-kL)
   206  			atmp := a[i*lda+l : i*lda+u]
   207  			tmp := alpha * x[i]
   208  			jy := ky
   209  			for _, v := range atmp {
   210  				y[jy+off*incY] += tmp * v
   211  				jy += incY
   212  			}
   213  		}
   214  		return
   215  	}
   216  	ix := kx
   217  	for i := 0; i < min(m, n+kL); i++ {
   218  		l := max(0, kL-i)
   219  		u := min(nCol, n+kL-i)
   220  		off := max(0, i-kL)
   221  		atmp := a[i*lda+l : i*lda+u]
   222  		tmp := alpha * x[ix]
   223  		jy := ky
   224  		for _, v := range atmp {
   225  			y[jy+off*incY] += tmp * v
   226  			jy += incY
   227  		}
   228  		ix += incX
   229  	}
   230  }
   231  
   232  // Strmv performs one of the matrix-vector operations
   233  //  x = A * x   if tA == blas.NoTrans
   234  //  x = Aᵀ * x  if tA == blas.Trans or blas.ConjTrans
   235  // where A is an n×n triangular matrix, and x is a vector.
   236  //
   237  // Float32 implementations are autogenerated and not directly tested.
   238  func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) {
   239  	if ul != blas.Lower && ul != blas.Upper {
   240  		panic(badUplo)
   241  	}
   242  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
   243  		panic(badTranspose)
   244  	}
   245  	if d != blas.NonUnit && d != blas.Unit {
   246  		panic(badDiag)
   247  	}
   248  	if n < 0 {
   249  		panic(nLT0)
   250  	}
   251  	if lda < max(1, n) {
   252  		panic(badLdA)
   253  	}
   254  	if incX == 0 {
   255  		panic(zeroIncX)
   256  	}
   257  
   258  	// Quick return if possible.
   259  	if n == 0 {
   260  		return
   261  	}
   262  
   263  	// For zero matrix size the following slice length checks are trivially satisfied.
   264  	if len(a) < lda*(n-1)+n {
   265  		panic(shortA)
   266  	}
   267  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
   268  		panic(shortX)
   269  	}
   270  
   271  	nonUnit := d != blas.Unit
   272  	if n == 1 {
   273  		if nonUnit {
   274  			x[0] *= a[0]
   275  		}
   276  		return
   277  	}
   278  	var kx int
   279  	if incX <= 0 {
   280  		kx = -(n - 1) * incX
   281  	}
   282  	if tA == blas.NoTrans {
   283  		if ul == blas.Upper {
   284  			if incX == 1 {
   285  				for i := 0; i < n; i++ {
   286  					ilda := i * lda
   287  					var tmp float32
   288  					if nonUnit {
   289  						tmp = a[ilda+i] * x[i]
   290  					} else {
   291  						tmp = x[i]
   292  					}
   293  					x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n])
   294  				}
   295  				return
   296  			}
   297  			ix := kx
   298  			for i := 0; i < n; i++ {
   299  				ilda := i * lda
   300  				var tmp float32
   301  				if nonUnit {
   302  					tmp = a[ilda+i] * x[ix]
   303  				} else {
   304  					tmp = x[ix]
   305  				}
   306  				x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0)
   307  				ix += incX
   308  			}
   309  			return
   310  		}
   311  		if incX == 1 {
   312  			for i := n - 1; i >= 0; i-- {
   313  				ilda := i * lda
   314  				var tmp float32
   315  				if nonUnit {
   316  					tmp += a[ilda+i] * x[i]
   317  				} else {
   318  					tmp = x[i]
   319  				}
   320  				x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x[:i])
   321  			}
   322  			return
   323  		}
   324  		ix := kx + (n-1)*incX
   325  		for i := n - 1; i >= 0; i-- {
   326  			ilda := i * lda
   327  			var tmp float32
   328  			if nonUnit {
   329  				tmp = a[ilda+i] * x[ix]
   330  			} else {
   331  				tmp = x[ix]
   332  			}
   333  			x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0)
   334  			ix -= incX
   335  		}
   336  		return
   337  	}
   338  	// Cases where a is transposed.
   339  	if ul == blas.Upper {
   340  		if incX == 1 {
   341  			for i := n - 1; i >= 0; i-- {
   342  				ilda := i * lda
   343  				xi := x[i]
   344  				f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n])
   345  				if nonUnit {
   346  					x[i] *= a[ilda+i]
   347  				}
   348  			}
   349  			return
   350  		}
   351  		ix := kx + (n-1)*incX
   352  		for i := n - 1; i >= 0; i-- {
   353  			ilda := i * lda
   354  			xi := x[ix]
   355  			f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX))
   356  			if nonUnit {
   357  				x[ix] *= a[ilda+i]
   358  			}
   359  			ix -= incX
   360  		}
   361  		return
   362  	}
   363  	if incX == 1 {
   364  		for i := 0; i < n; i++ {
   365  			ilda := i * lda
   366  			xi := x[i]
   367  			f32.AxpyUnitary(xi, a[ilda:ilda+i], x[:i])
   368  			if nonUnit {
   369  				x[i] *= a[i*lda+i]
   370  			}
   371  		}
   372  		return
   373  	}
   374  	ix := kx
   375  	for i := 0; i < n; i++ {
   376  		ilda := i * lda
   377  		xi := x[ix]
   378  		f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx))
   379  		if nonUnit {
   380  			x[ix] *= a[ilda+i]
   381  		}
   382  		ix += incX
   383  	}
   384  }
   385  
   386  // Strsv solves one of the systems of equations
   387  //  A * x = b   if tA == blas.NoTrans
   388  //  Aᵀ * x = b  if tA == blas.Trans or blas.ConjTrans
   389  // where A is an n×n triangular matrix, and x and b are vectors.
   390  //
   391  // At entry to the function, x contains the values of b, and the result is
   392  // stored in-place into x.
   393  //
   394  // No test for singularity or near-singularity is included in this
   395  // routine. Such tests must be performed before calling this routine.
   396  //
   397  // Float32 implementations are autogenerated and not directly tested.
   398  func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) {
   399  	if ul != blas.Lower && ul != blas.Upper {
   400  		panic(badUplo)
   401  	}
   402  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
   403  		panic(badTranspose)
   404  	}
   405  	if d != blas.NonUnit && d != blas.Unit {
   406  		panic(badDiag)
   407  	}
   408  	if n < 0 {
   409  		panic(nLT0)
   410  	}
   411  	if lda < max(1, n) {
   412  		panic(badLdA)
   413  	}
   414  	if incX == 0 {
   415  		panic(zeroIncX)
   416  	}
   417  
   418  	// Quick return if possible.
   419  	if n == 0 {
   420  		return
   421  	}
   422  
   423  	// For zero matrix size the following slice length checks are trivially satisfied.
   424  	if len(a) < lda*(n-1)+n {
   425  		panic(shortA)
   426  	}
   427  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
   428  		panic(shortX)
   429  	}
   430  
   431  	if n == 1 {
   432  		if d == blas.NonUnit {
   433  			x[0] /= a[0]
   434  		}
   435  		return
   436  	}
   437  
   438  	var kx int
   439  	if incX < 0 {
   440  		kx = -(n - 1) * incX
   441  	}
   442  	nonUnit := d == blas.NonUnit
   443  	if tA == blas.NoTrans {
   444  		if ul == blas.Upper {
   445  			if incX == 1 {
   446  				for i := n - 1; i >= 0; i-- {
   447  					var sum float32
   448  					atmp := a[i*lda+i+1 : i*lda+n]
   449  					for j, v := range atmp {
   450  						jv := i + j + 1
   451  						sum += x[jv] * v
   452  					}
   453  					x[i] -= sum
   454  					if nonUnit {
   455  						x[i] /= a[i*lda+i]
   456  					}
   457  				}
   458  				return
   459  			}
   460  			ix := kx + (n-1)*incX
   461  			for i := n - 1; i >= 0; i-- {
   462  				var sum float32
   463  				jx := ix + incX
   464  				atmp := a[i*lda+i+1 : i*lda+n]
   465  				for _, v := range atmp {
   466  					sum += x[jx] * v
   467  					jx += incX
   468  				}
   469  				x[ix] -= sum
   470  				if nonUnit {
   471  					x[ix] /= a[i*lda+i]
   472  				}
   473  				ix -= incX
   474  			}
   475  			return
   476  		}
   477  		if incX == 1 {
   478  			for i := 0; i < n; i++ {
   479  				var sum float32
   480  				atmp := a[i*lda : i*lda+i]
   481  				for j, v := range atmp {
   482  					sum += x[j] * v
   483  				}
   484  				x[i] -= sum
   485  				if nonUnit {
   486  					x[i] /= a[i*lda+i]
   487  				}
   488  			}
   489  			return
   490  		}
   491  		ix := kx
   492  		for i := 0; i < n; i++ {
   493  			jx := kx
   494  			var sum float32
   495  			atmp := a[i*lda : i*lda+i]
   496  			for _, v := range atmp {
   497  				sum += x[jx] * v
   498  				jx += incX
   499  			}
   500  			x[ix] -= sum
   501  			if nonUnit {
   502  				x[ix] /= a[i*lda+i]
   503  			}
   504  			ix += incX
   505  		}
   506  		return
   507  	}
   508  	// Cases where a is transposed.
   509  	if ul == blas.Upper {
   510  		if incX == 1 {
   511  			for i := 0; i < n; i++ {
   512  				if nonUnit {
   513  					x[i] /= a[i*lda+i]
   514  				}
   515  				xi := x[i]
   516  				atmp := a[i*lda+i+1 : i*lda+n]
   517  				for j, v := range atmp {
   518  					jv := j + i + 1
   519  					x[jv] -= v * xi
   520  				}
   521  			}
   522  			return
   523  		}
   524  		ix := kx
   525  		for i := 0; i < n; i++ {
   526  			if nonUnit {
   527  				x[ix] /= a[i*lda+i]
   528  			}
   529  			xi := x[ix]
   530  			jx := kx + (i+1)*incX
   531  			atmp := a[i*lda+i+1 : i*lda+n]
   532  			for _, v := range atmp {
   533  				x[jx] -= v * xi
   534  				jx += incX
   535  			}
   536  			ix += incX
   537  		}
   538  		return
   539  	}
   540  	if incX == 1 {
   541  		for i := n - 1; i >= 0; i-- {
   542  			if nonUnit {
   543  				x[i] /= a[i*lda+i]
   544  			}
   545  			xi := x[i]
   546  			atmp := a[i*lda : i*lda+i]
   547  			for j, v := range atmp {
   548  				x[j] -= v * xi
   549  			}
   550  		}
   551  		return
   552  	}
   553  	ix := kx + (n-1)*incX
   554  	for i := n - 1; i >= 0; i-- {
   555  		if nonUnit {
   556  			x[ix] /= a[i*lda+i]
   557  		}
   558  		xi := x[ix]
   559  		jx := kx
   560  		atmp := a[i*lda : i*lda+i]
   561  		for _, v := range atmp {
   562  			x[jx] -= v * xi
   563  			jx += incX
   564  		}
   565  		ix -= incX
   566  	}
   567  }
   568  
   569  // Ssymv performs the matrix-vector operation
   570  //  y = alpha * A * x + beta * y
   571  // where A is an n×n symmetric matrix, x and y are vectors, and alpha and
   572  // beta are scalars.
   573  //
   574  // Float32 implementations are autogenerated and not directly tested.
   575  func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
   576  	if ul != blas.Lower && ul != blas.Upper {
   577  		panic(badUplo)
   578  	}
   579  	if n < 0 {
   580  		panic(nLT0)
   581  	}
   582  	if lda < max(1, n) {
   583  		panic(badLdA)
   584  	}
   585  	if incX == 0 {
   586  		panic(zeroIncX)
   587  	}
   588  	if incY == 0 {
   589  		panic(zeroIncY)
   590  	}
   591  
   592  	// Quick return if possible.
   593  	if n == 0 {
   594  		return
   595  	}
   596  
   597  	// For zero matrix size the following slice length checks are trivially satisfied.
   598  	if len(a) < lda*(n-1)+n {
   599  		panic(shortA)
   600  	}
   601  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
   602  		panic(shortX)
   603  	}
   604  	if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
   605  		panic(shortY)
   606  	}
   607  
   608  	// Quick return if possible.
   609  	if alpha == 0 && beta == 1 {
   610  		return
   611  	}
   612  
   613  	// Set up start points
   614  	var kx, ky int
   615  	if incX < 0 {
   616  		kx = -(n - 1) * incX
   617  	}
   618  	if incY < 0 {
   619  		ky = -(n - 1) * incY
   620  	}
   621  
   622  	// Form y = beta * y
   623  	if beta != 1 {
   624  		if incY == 1 {
   625  			if beta == 0 {
   626  				for i := range y[:n] {
   627  					y[i] = 0
   628  				}
   629  			} else {
   630  				f32.ScalUnitary(beta, y[:n])
   631  			}
   632  		} else {
   633  			iy := ky
   634  			if beta == 0 {
   635  				for i := 0; i < n; i++ {
   636  					y[iy] = 0
   637  					iy += incY
   638  				}
   639  			} else {
   640  				if incY > 0 {
   641  					f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
   642  				} else {
   643  					f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
   644  				}
   645  			}
   646  		}
   647  	}
   648  
   649  	if alpha == 0 {
   650  		return
   651  	}
   652  
   653  	if n == 1 {
   654  		y[0] += alpha * a[0] * x[0]
   655  		return
   656  	}
   657  
   658  	if ul == blas.Upper {
   659  		if incX == 1 {
   660  			iy := ky
   661  			for i := 0; i < n; i++ {
   662  				xv := x[i] * alpha
   663  				sum := x[i] * a[i*lda+i]
   664  				jy := ky + (i+1)*incY
   665  				atmp := a[i*lda+i+1 : i*lda+n]
   666  				for j, v := range atmp {
   667  					jp := j + i + 1
   668  					sum += x[jp] * v
   669  					y[jy] += xv * v
   670  					jy += incY
   671  				}
   672  				y[iy] += alpha * sum
   673  				iy += incY
   674  			}
   675  			return
   676  		}
   677  		ix := kx
   678  		iy := ky
   679  		for i := 0; i < n; i++ {
   680  			xv := x[ix] * alpha
   681  			sum := x[ix] * a[i*lda+i]
   682  			jx := kx + (i+1)*incX
   683  			jy := ky + (i+1)*incY
   684  			atmp := a[i*lda+i+1 : i*lda+n]
   685  			for _, v := range atmp {
   686  				sum += x[jx] * v
   687  				y[jy] += xv * v
   688  				jx += incX
   689  				jy += incY
   690  			}
   691  			y[iy] += alpha * sum
   692  			ix += incX
   693  			iy += incY
   694  		}
   695  		return
   696  	}
   697  	// Cases where a is lower triangular.
   698  	if incX == 1 {
   699  		iy := ky
   700  		for i := 0; i < n; i++ {
   701  			jy := ky
   702  			xv := alpha * x[i]
   703  			atmp := a[i*lda : i*lda+i]
   704  			var sum float32
   705  			for j, v := range atmp {
   706  				sum += x[j] * v
   707  				y[jy] += xv * v
   708  				jy += incY
   709  			}
   710  			sum += x[i] * a[i*lda+i]
   711  			sum *= alpha
   712  			y[iy] += sum
   713  			iy += incY
   714  		}
   715  		return
   716  	}
   717  	ix := kx
   718  	iy := ky
   719  	for i := 0; i < n; i++ {
   720  		jx := kx
   721  		jy := ky
   722  		xv := alpha * x[ix]
   723  		atmp := a[i*lda : i*lda+i]
   724  		var sum float32
   725  		for _, v := range atmp {
   726  			sum += x[jx] * v
   727  			y[jy] += xv * v
   728  			jx += incX
   729  			jy += incY
   730  		}
   731  		sum += x[ix] * a[i*lda+i]
   732  		sum *= alpha
   733  		y[iy] += sum
   734  		ix += incX
   735  		iy += incY
   736  	}
   737  }
   738  
   739  // Stbmv performs one of the matrix-vector operations
   740  //  x = A * x   if tA == blas.NoTrans
   741  //  x = Aᵀ * x  if tA == blas.Trans or blas.ConjTrans
   742  // where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector.
   743  //
   744  // Float32 implementations are autogenerated and not directly tested.
   745  func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) {
   746  	if ul != blas.Lower && ul != blas.Upper {
   747  		panic(badUplo)
   748  	}
   749  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
   750  		panic(badTranspose)
   751  	}
   752  	if d != blas.NonUnit && d != blas.Unit {
   753  		panic(badDiag)
   754  	}
   755  	if n < 0 {
   756  		panic(nLT0)
   757  	}
   758  	if k < 0 {
   759  		panic(kLT0)
   760  	}
   761  	if lda < k+1 {
   762  		panic(badLdA)
   763  	}
   764  	if incX == 0 {
   765  		panic(zeroIncX)
   766  	}
   767  
   768  	// Quick return if possible.
   769  	if n == 0 {
   770  		return
   771  	}
   772  
   773  	// For zero matrix size the following slice length checks are trivially satisfied.
   774  	if len(a) < lda*(n-1)+k+1 {
   775  		panic(shortA)
   776  	}
   777  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
   778  		panic(shortX)
   779  	}
   780  
   781  	var kx int
   782  	if incX < 0 {
   783  		kx = -(n - 1) * incX
   784  	}
   785  
   786  	nonunit := d != blas.Unit
   787  
   788  	if tA == blas.NoTrans {
   789  		if ul == blas.Upper {
   790  			if incX == 1 {
   791  				for i := 0; i < n; i++ {
   792  					u := min(1+k, n-i)
   793  					var sum float32
   794  					atmp := a[i*lda:]
   795  					xtmp := x[i:]
   796  					for j := 1; j < u; j++ {
   797  						sum += xtmp[j] * atmp[j]
   798  					}
   799  					if nonunit {
   800  						sum += xtmp[0] * atmp[0]
   801  					} else {
   802  						sum += xtmp[0]
   803  					}
   804  					x[i] = sum
   805  				}
   806  				return
   807  			}
   808  			ix := kx
   809  			for i := 0; i < n; i++ {
   810  				u := min(1+k, n-i)
   811  				var sum float32
   812  				atmp := a[i*lda:]
   813  				jx := incX
   814  				for j := 1; j < u; j++ {
   815  					sum += x[ix+jx] * atmp[j]
   816  					jx += incX
   817  				}
   818  				if nonunit {
   819  					sum += x[ix] * atmp[0]
   820  				} else {
   821  					sum += x[ix]
   822  				}
   823  				x[ix] = sum
   824  				ix += incX
   825  			}
   826  			return
   827  		}
   828  		if incX == 1 {
   829  			for i := n - 1; i >= 0; i-- {
   830  				l := max(0, k-i)
   831  				atmp := a[i*lda:]
   832  				var sum float32
   833  				for j := l; j < k; j++ {
   834  					sum += x[i-k+j] * atmp[j]
   835  				}
   836  				if nonunit {
   837  					sum += x[i] * atmp[k]
   838  				} else {
   839  					sum += x[i]
   840  				}
   841  				x[i] = sum
   842  			}
   843  			return
   844  		}
   845  		ix := kx + (n-1)*incX
   846  		for i := n - 1; i >= 0; i-- {
   847  			l := max(0, k-i)
   848  			atmp := a[i*lda:]
   849  			var sum float32
   850  			jx := l * incX
   851  			for j := l; j < k; j++ {
   852  				sum += x[ix-k*incX+jx] * atmp[j]
   853  				jx += incX
   854  			}
   855  			if nonunit {
   856  				sum += x[ix] * atmp[k]
   857  			} else {
   858  				sum += x[ix]
   859  			}
   860  			x[ix] = sum
   861  			ix -= incX
   862  		}
   863  		return
   864  	}
   865  	if ul == blas.Upper {
   866  		if incX == 1 {
   867  			for i := n - 1; i >= 0; i-- {
   868  				u := k + 1
   869  				if i < u {
   870  					u = i + 1
   871  				}
   872  				var sum float32
   873  				for j := 1; j < u; j++ {
   874  					sum += x[i-j] * a[(i-j)*lda+j]
   875  				}
   876  				if nonunit {
   877  					sum += x[i] * a[i*lda]
   878  				} else {
   879  					sum += x[i]
   880  				}
   881  				x[i] = sum
   882  			}
   883  			return
   884  		}
   885  		ix := kx + (n-1)*incX
   886  		for i := n - 1; i >= 0; i-- {
   887  			u := k + 1
   888  			if i < u {
   889  				u = i + 1
   890  			}
   891  			var sum float32
   892  			jx := incX
   893  			for j := 1; j < u; j++ {
   894  				sum += x[ix-jx] * a[(i-j)*lda+j]
   895  				jx += incX
   896  			}
   897  			if nonunit {
   898  				sum += x[ix] * a[i*lda]
   899  			} else {
   900  				sum += x[ix]
   901  			}
   902  			x[ix] = sum
   903  			ix -= incX
   904  		}
   905  		return
   906  	}
   907  	if incX == 1 {
   908  		for i := 0; i < n; i++ {
   909  			u := k
   910  			if i+k >= n {
   911  				u = n - i - 1
   912  			}
   913  			var sum float32
   914  			for j := 0; j < u; j++ {
   915  				sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1]
   916  			}
   917  			if nonunit {
   918  				sum += x[i] * a[i*lda+k]
   919  			} else {
   920  				sum += x[i]
   921  			}
   922  			x[i] = sum
   923  		}
   924  		return
   925  	}
   926  	ix := kx
   927  	for i := 0; i < n; i++ {
   928  		u := k
   929  		if i+k >= n {
   930  			u = n - i - 1
   931  		}
   932  		var (
   933  			sum float32
   934  			jx  int
   935  		)
   936  		for j := 0; j < u; j++ {
   937  			sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1]
   938  			jx += incX
   939  		}
   940  		if nonunit {
   941  			sum += x[ix] * a[i*lda+k]
   942  		} else {
   943  			sum += x[ix]
   944  		}
   945  		x[ix] = sum
   946  		ix += incX
   947  	}
   948  }
   949  
   950  // Stpmv performs one of the matrix-vector operations
   951  //  x = A * x   if tA == blas.NoTrans
   952  //  x = Aᵀ * x  if tA == blas.Trans or blas.ConjTrans
   953  // where A is an n×n triangular matrix in packed format, and x is a vector.
   954  //
   955  // Float32 implementations are autogenerated and not directly tested.
   956  func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) {
   957  	if ul != blas.Lower && ul != blas.Upper {
   958  		panic(badUplo)
   959  	}
   960  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
   961  		panic(badTranspose)
   962  	}
   963  	if d != blas.NonUnit && d != blas.Unit {
   964  		panic(badDiag)
   965  	}
   966  	if n < 0 {
   967  		panic(nLT0)
   968  	}
   969  	if incX == 0 {
   970  		panic(zeroIncX)
   971  	}
   972  
   973  	// Quick return if possible.
   974  	if n == 0 {
   975  		return
   976  	}
   977  
   978  	// For zero matrix size the following slice length checks are trivially satisfied.
   979  	if len(ap) < n*(n+1)/2 {
   980  		panic(shortAP)
   981  	}
   982  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
   983  		panic(shortX)
   984  	}
   985  
   986  	var kx int
   987  	if incX < 0 {
   988  		kx = -(n - 1) * incX
   989  	}
   990  
   991  	nonUnit := d == blas.NonUnit
   992  	var offset int // Offset is the index of (i,i)
   993  	if tA == blas.NoTrans {
   994  		if ul == blas.Upper {
   995  			if incX == 1 {
   996  				for i := 0; i < n; i++ {
   997  					xi := x[i]
   998  					if nonUnit {
   999  						xi *= ap[offset]
  1000  					}
  1001  					atmp := ap[offset+1 : offset+n-i]
  1002  					xtmp := x[i+1:]
  1003  					for j, v := range atmp {
  1004  						xi += v * xtmp[j]
  1005  					}
  1006  					x[i] = xi
  1007  					offset += n - i
  1008  				}
  1009  				return
  1010  			}
  1011  			ix := kx
  1012  			for i := 0; i < n; i++ {
  1013  				xix := x[ix]
  1014  				if nonUnit {
  1015  					xix *= ap[offset]
  1016  				}
  1017  				atmp := ap[offset+1 : offset+n-i]
  1018  				jx := kx + (i+1)*incX
  1019  				for _, v := range atmp {
  1020  					xix += v * x[jx]
  1021  					jx += incX
  1022  				}
  1023  				x[ix] = xix
  1024  				offset += n - i
  1025  				ix += incX
  1026  			}
  1027  			return
  1028  		}
  1029  		if incX == 1 {
  1030  			offset = n*(n+1)/2 - 1
  1031  			for i := n - 1; i >= 0; i-- {
  1032  				xi := x[i]
  1033  				if nonUnit {
  1034  					xi *= ap[offset]
  1035  				}
  1036  				atmp := ap[offset-i : offset]
  1037  				for j, v := range atmp {
  1038  					xi += v * x[j]
  1039  				}
  1040  				x[i] = xi
  1041  				offset -= i + 1
  1042  			}
  1043  			return
  1044  		}
  1045  		ix := kx + (n-1)*incX
  1046  		offset = n*(n+1)/2 - 1
  1047  		for i := n - 1; i >= 0; i-- {
  1048  			xix := x[ix]
  1049  			if nonUnit {
  1050  				xix *= ap[offset]
  1051  			}
  1052  			atmp := ap[offset-i : offset]
  1053  			jx := kx
  1054  			for _, v := range atmp {
  1055  				xix += v * x[jx]
  1056  				jx += incX
  1057  			}
  1058  			x[ix] = xix
  1059  			offset -= i + 1
  1060  			ix -= incX
  1061  		}
  1062  		return
  1063  	}
  1064  	// Cases where ap is transposed.
  1065  	if ul == blas.Upper {
  1066  		if incX == 1 {
  1067  			offset = n*(n+1)/2 - 1
  1068  			for i := n - 1; i >= 0; i-- {
  1069  				xi := x[i]
  1070  				atmp := ap[offset+1 : offset+n-i]
  1071  				xtmp := x[i+1:]
  1072  				for j, v := range atmp {
  1073  					xtmp[j] += v * xi
  1074  				}
  1075  				if nonUnit {
  1076  					x[i] *= ap[offset]
  1077  				}
  1078  				offset -= n - i + 1
  1079  			}
  1080  			return
  1081  		}
  1082  		ix := kx + (n-1)*incX
  1083  		offset = n*(n+1)/2 - 1
  1084  		for i := n - 1; i >= 0; i-- {
  1085  			xix := x[ix]
  1086  			jx := kx + (i+1)*incX
  1087  			atmp := ap[offset+1 : offset+n-i]
  1088  			for _, v := range atmp {
  1089  				x[jx] += v * xix
  1090  				jx += incX
  1091  			}
  1092  			if nonUnit {
  1093  				x[ix] *= ap[offset]
  1094  			}
  1095  			offset -= n - i + 1
  1096  			ix -= incX
  1097  		}
  1098  		return
  1099  	}
  1100  	if incX == 1 {
  1101  		for i := 0; i < n; i++ {
  1102  			xi := x[i]
  1103  			atmp := ap[offset-i : offset]
  1104  			for j, v := range atmp {
  1105  				x[j] += v * xi
  1106  			}
  1107  			if nonUnit {
  1108  				x[i] *= ap[offset]
  1109  			}
  1110  			offset += i + 2
  1111  		}
  1112  		return
  1113  	}
  1114  	ix := kx
  1115  	for i := 0; i < n; i++ {
  1116  		xix := x[ix]
  1117  		jx := kx
  1118  		atmp := ap[offset-i : offset]
  1119  		for _, v := range atmp {
  1120  			x[jx] += v * xix
  1121  			jx += incX
  1122  		}
  1123  		if nonUnit {
  1124  			x[ix] *= ap[offset]
  1125  		}
  1126  		ix += incX
  1127  		offset += i + 2
  1128  	}
  1129  }
  1130  
  1131  // Stbsv solves one of the systems of equations
  1132  //  A * x = b   if tA == blas.NoTrans
  1133  //  Aᵀ * x = b  if tA == blas.Trans or tA == blas.ConjTrans
  1134  // where A is an n×n triangular band matrix with k+1 diagonals,
  1135  // and x and b are vectors.
  1136  //
  1137  // At entry to the function, x contains the values of b, and the result is
  1138  // stored in-place into x.
  1139  //
  1140  // No test for singularity or near-singularity is included in this
  1141  // routine. Such tests must be performed before calling this routine.
  1142  //
  1143  // Float32 implementations are autogenerated and not directly tested.
  1144  func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) {
  1145  	if ul != blas.Lower && ul != blas.Upper {
  1146  		panic(badUplo)
  1147  	}
  1148  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  1149  		panic(badTranspose)
  1150  	}
  1151  	if d != blas.NonUnit && d != blas.Unit {
  1152  		panic(badDiag)
  1153  	}
  1154  	if n < 0 {
  1155  		panic(nLT0)
  1156  	}
  1157  	if k < 0 {
  1158  		panic(kLT0)
  1159  	}
  1160  	if lda < k+1 {
  1161  		panic(badLdA)
  1162  	}
  1163  	if incX == 0 {
  1164  		panic(zeroIncX)
  1165  	}
  1166  
  1167  	// Quick return if possible.
  1168  	if n == 0 {
  1169  		return
  1170  	}
  1171  
  1172  	// For zero matrix size the following slice length checks are trivially satisfied.
  1173  	if len(a) < lda*(n-1)+k+1 {
  1174  		panic(shortA)
  1175  	}
  1176  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1177  		panic(shortX)
  1178  	}
  1179  
  1180  	var kx int
  1181  	if incX < 0 {
  1182  		kx = -(n - 1) * incX
  1183  	}
  1184  	nonUnit := d == blas.NonUnit
  1185  	// Form x = A^-1 x.
  1186  	// Several cases below use subslices for speed improvement.
  1187  	// The incX != 1 cases usually do not because incX may be negative.
  1188  	if tA == blas.NoTrans {
  1189  		if ul == blas.Upper {
  1190  			if incX == 1 {
  1191  				for i := n - 1; i >= 0; i-- {
  1192  					bands := k
  1193  					if i+bands >= n {
  1194  						bands = n - i - 1
  1195  					}
  1196  					atmp := a[i*lda+1:]
  1197  					xtmp := x[i+1 : i+bands+1]
  1198  					var sum float32
  1199  					for j, v := range xtmp {
  1200  						sum += v * atmp[j]
  1201  					}
  1202  					x[i] -= sum
  1203  					if nonUnit {
  1204  						x[i] /= a[i*lda]
  1205  					}
  1206  				}
  1207  				return
  1208  			}
  1209  			ix := kx + (n-1)*incX
  1210  			for i := n - 1; i >= 0; i-- {
  1211  				max := k + 1
  1212  				if i+max > n {
  1213  					max = n - i
  1214  				}
  1215  				atmp := a[i*lda:]
  1216  				var (
  1217  					jx  int
  1218  					sum float32
  1219  				)
  1220  				for j := 1; j < max; j++ {
  1221  					jx += incX
  1222  					sum += x[ix+jx] * atmp[j]
  1223  				}
  1224  				x[ix] -= sum
  1225  				if nonUnit {
  1226  					x[ix] /= atmp[0]
  1227  				}
  1228  				ix -= incX
  1229  			}
  1230  			return
  1231  		}
  1232  		if incX == 1 {
  1233  			for i := 0; i < n; i++ {
  1234  				bands := k
  1235  				if i-k < 0 {
  1236  					bands = i
  1237  				}
  1238  				atmp := a[i*lda+k-bands:]
  1239  				xtmp := x[i-bands : i]
  1240  				var sum float32
  1241  				for j, v := range xtmp {
  1242  					sum += v * atmp[j]
  1243  				}
  1244  				x[i] -= sum
  1245  				if nonUnit {
  1246  					x[i] /= atmp[bands]
  1247  				}
  1248  			}
  1249  			return
  1250  		}
  1251  		ix := kx
  1252  		for i := 0; i < n; i++ {
  1253  			bands := k
  1254  			if i-k < 0 {
  1255  				bands = i
  1256  			}
  1257  			atmp := a[i*lda+k-bands:]
  1258  			var (
  1259  				sum float32
  1260  				jx  int
  1261  			)
  1262  			for j := 0; j < bands; j++ {
  1263  				sum += x[ix-bands*incX+jx] * atmp[j]
  1264  				jx += incX
  1265  			}
  1266  			x[ix] -= sum
  1267  			if nonUnit {
  1268  				x[ix] /= atmp[bands]
  1269  			}
  1270  			ix += incX
  1271  		}
  1272  		return
  1273  	}
  1274  	// Cases where a is transposed.
  1275  	if ul == blas.Upper {
  1276  		if incX == 1 {
  1277  			for i := 0; i < n; i++ {
  1278  				bands := k
  1279  				if i-k < 0 {
  1280  					bands = i
  1281  				}
  1282  				var sum float32
  1283  				for j := 0; j < bands; j++ {
  1284  					sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j]
  1285  				}
  1286  				x[i] -= sum
  1287  				if nonUnit {
  1288  					x[i] /= a[i*lda]
  1289  				}
  1290  			}
  1291  			return
  1292  		}
  1293  		ix := kx
  1294  		for i := 0; i < n; i++ {
  1295  			bands := k
  1296  			if i-k < 0 {
  1297  				bands = i
  1298  			}
  1299  			var (
  1300  				sum float32
  1301  				jx  int
  1302  			)
  1303  			for j := 0; j < bands; j++ {
  1304  				sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j]
  1305  				jx += incX
  1306  			}
  1307  			x[ix] -= sum
  1308  			if nonUnit {
  1309  				x[ix] /= a[i*lda]
  1310  			}
  1311  			ix += incX
  1312  		}
  1313  		return
  1314  	}
  1315  	if incX == 1 {
  1316  		for i := n - 1; i >= 0; i-- {
  1317  			bands := k
  1318  			if i+bands >= n {
  1319  				bands = n - i - 1
  1320  			}
  1321  			var sum float32
  1322  			xtmp := x[i+1 : i+1+bands]
  1323  			for j, v := range xtmp {
  1324  				sum += v * a[(i+j+1)*lda+k-j-1]
  1325  			}
  1326  			x[i] -= sum
  1327  			if nonUnit {
  1328  				x[i] /= a[i*lda+k]
  1329  			}
  1330  		}
  1331  		return
  1332  	}
  1333  	ix := kx + (n-1)*incX
  1334  	for i := n - 1; i >= 0; i-- {
  1335  		bands := k
  1336  		if i+bands >= n {
  1337  			bands = n - i - 1
  1338  		}
  1339  		var (
  1340  			sum float32
  1341  			jx  int
  1342  		)
  1343  		for j := 0; j < bands; j++ {
  1344  			sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1]
  1345  			jx += incX
  1346  		}
  1347  		x[ix] -= sum
  1348  		if nonUnit {
  1349  			x[ix] /= a[i*lda+k]
  1350  		}
  1351  		ix -= incX
  1352  	}
  1353  }
  1354  
  1355  // Ssbmv performs the matrix-vector operation
  1356  //  y = alpha * A * x + beta * y
  1357  // where A is an n×n symmetric band matrix with k super-diagonals, x and y are
  1358  // vectors, and alpha and beta are scalars.
  1359  //
  1360  // Float32 implementations are autogenerated and not directly tested.
  1361  func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) {
  1362  	if ul != blas.Lower && ul != blas.Upper {
  1363  		panic(badUplo)
  1364  	}
  1365  	if n < 0 {
  1366  		panic(nLT0)
  1367  	}
  1368  	if k < 0 {
  1369  		panic(kLT0)
  1370  	}
  1371  	if lda < k+1 {
  1372  		panic(badLdA)
  1373  	}
  1374  	if incX == 0 {
  1375  		panic(zeroIncX)
  1376  	}
  1377  	if incY == 0 {
  1378  		panic(zeroIncY)
  1379  	}
  1380  
  1381  	// Quick return if possible.
  1382  	if n == 0 {
  1383  		return
  1384  	}
  1385  
  1386  	// For zero matrix size the following slice length checks are trivially satisfied.
  1387  	if len(a) < lda*(n-1)+k+1 {
  1388  		panic(shortA)
  1389  	}
  1390  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1391  		panic(shortX)
  1392  	}
  1393  	if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  1394  		panic(shortY)
  1395  	}
  1396  
  1397  	// Quick return if possible.
  1398  	if alpha == 0 && beta == 1 {
  1399  		return
  1400  	}
  1401  
  1402  	// Set up indexes
  1403  	lenX := n
  1404  	lenY := n
  1405  	var kx, ky int
  1406  	if incX < 0 {
  1407  		kx = -(lenX - 1) * incX
  1408  	}
  1409  	if incY < 0 {
  1410  		ky = -(lenY - 1) * incY
  1411  	}
  1412  
  1413  	// Form y = beta * y.
  1414  	if beta != 1 {
  1415  		if incY == 1 {
  1416  			if beta == 0 {
  1417  				for i := range y[:n] {
  1418  					y[i] = 0
  1419  				}
  1420  			} else {
  1421  				f32.ScalUnitary(beta, y[:n])
  1422  			}
  1423  		} else {
  1424  			iy := ky
  1425  			if beta == 0 {
  1426  				for i := 0; i < n; i++ {
  1427  					y[iy] = 0
  1428  					iy += incY
  1429  				}
  1430  			} else {
  1431  				if incY > 0 {
  1432  					f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
  1433  				} else {
  1434  					f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
  1435  				}
  1436  			}
  1437  		}
  1438  	}
  1439  
  1440  	if alpha == 0 {
  1441  		return
  1442  	}
  1443  
  1444  	if ul == blas.Upper {
  1445  		if incX == 1 {
  1446  			iy := ky
  1447  			for i := 0; i < n; i++ {
  1448  				atmp := a[i*lda:]
  1449  				tmp := alpha * x[i]
  1450  				sum := tmp * atmp[0]
  1451  				u := min(k, n-i-1)
  1452  				jy := incY
  1453  				for j := 1; j <= u; j++ {
  1454  					v := atmp[j]
  1455  					sum += alpha * x[i+j] * v
  1456  					y[iy+jy] += tmp * v
  1457  					jy += incY
  1458  				}
  1459  				y[iy] += sum
  1460  				iy += incY
  1461  			}
  1462  			return
  1463  		}
  1464  		ix := kx
  1465  		iy := ky
  1466  		for i := 0; i < n; i++ {
  1467  			atmp := a[i*lda:]
  1468  			tmp := alpha * x[ix]
  1469  			sum := tmp * atmp[0]
  1470  			u := min(k, n-i-1)
  1471  			jx := incX
  1472  			jy := incY
  1473  			for j := 1; j <= u; j++ {
  1474  				v := atmp[j]
  1475  				sum += alpha * x[ix+jx] * v
  1476  				y[iy+jy] += tmp * v
  1477  				jx += incX
  1478  				jy += incY
  1479  			}
  1480  			y[iy] += sum
  1481  			ix += incX
  1482  			iy += incY
  1483  		}
  1484  		return
  1485  	}
  1486  
  1487  	// Casses where a has bands below the diagonal.
  1488  	if incX == 1 {
  1489  		iy := ky
  1490  		for i := 0; i < n; i++ {
  1491  			l := max(0, k-i)
  1492  			tmp := alpha * x[i]
  1493  			jy := l * incY
  1494  			atmp := a[i*lda:]
  1495  			for j := l; j < k; j++ {
  1496  				v := atmp[j]
  1497  				y[iy] += alpha * v * x[i-k+j]
  1498  				y[iy-k*incY+jy] += tmp * v
  1499  				jy += incY
  1500  			}
  1501  			y[iy] += tmp * atmp[k]
  1502  			iy += incY
  1503  		}
  1504  		return
  1505  	}
  1506  	ix := kx
  1507  	iy := ky
  1508  	for i := 0; i < n; i++ {
  1509  		l := max(0, k-i)
  1510  		tmp := alpha * x[ix]
  1511  		jx := l * incX
  1512  		jy := l * incY
  1513  		atmp := a[i*lda:]
  1514  		for j := l; j < k; j++ {
  1515  			v := atmp[j]
  1516  			y[iy] += alpha * v * x[ix-k*incX+jx]
  1517  			y[iy-k*incY+jy] += tmp * v
  1518  			jx += incX
  1519  			jy += incY
  1520  		}
  1521  		y[iy] += tmp * atmp[k]
  1522  		ix += incX
  1523  		iy += incY
  1524  	}
  1525  }
  1526  
  1527  // Ssyr performs the symmetric rank-one update
  1528  //  A += alpha * x * xᵀ
  1529  // where A is an n×n symmetric matrix, and x is a vector.
  1530  //
  1531  // Float32 implementations are autogenerated and not directly tested.
  1532  func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) {
  1533  	if ul != blas.Lower && ul != blas.Upper {
  1534  		panic(badUplo)
  1535  	}
  1536  	if n < 0 {
  1537  		panic(nLT0)
  1538  	}
  1539  	if lda < max(1, n) {
  1540  		panic(badLdA)
  1541  	}
  1542  	if incX == 0 {
  1543  		panic(zeroIncX)
  1544  	}
  1545  
  1546  	// Quick return if possible.
  1547  	if n == 0 {
  1548  		return
  1549  	}
  1550  
  1551  	// For zero matrix size the following slice length checks are trivially satisfied.
  1552  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1553  		panic(shortX)
  1554  	}
  1555  	if len(a) < lda*(n-1)+n {
  1556  		panic(shortA)
  1557  	}
  1558  
  1559  	// Quick return if possible.
  1560  	if alpha == 0 {
  1561  		return
  1562  	}
  1563  
  1564  	lenX := n
  1565  	var kx int
  1566  	if incX < 0 {
  1567  		kx = -(lenX - 1) * incX
  1568  	}
  1569  	if ul == blas.Upper {
  1570  		if incX == 1 {
  1571  			for i := 0; i < n; i++ {
  1572  				tmp := x[i] * alpha
  1573  				if tmp != 0 {
  1574  					atmp := a[i*lda+i : i*lda+n]
  1575  					xtmp := x[i:n]
  1576  					for j, v := range xtmp {
  1577  						atmp[j] += v * tmp
  1578  					}
  1579  				}
  1580  			}
  1581  			return
  1582  		}
  1583  		ix := kx
  1584  		for i := 0; i < n; i++ {
  1585  			tmp := x[ix] * alpha
  1586  			if tmp != 0 {
  1587  				jx := ix
  1588  				atmp := a[i*lda:]
  1589  				for j := i; j < n; j++ {
  1590  					atmp[j] += x[jx] * tmp
  1591  					jx += incX
  1592  				}
  1593  			}
  1594  			ix += incX
  1595  		}
  1596  		return
  1597  	}
  1598  	// Cases where a is lower triangular.
  1599  	if incX == 1 {
  1600  		for i := 0; i < n; i++ {
  1601  			tmp := x[i] * alpha
  1602  			if tmp != 0 {
  1603  				atmp := a[i*lda:]
  1604  				xtmp := x[:i+1]
  1605  				for j, v := range xtmp {
  1606  					atmp[j] += tmp * v
  1607  				}
  1608  			}
  1609  		}
  1610  		return
  1611  	}
  1612  	ix := kx
  1613  	for i := 0; i < n; i++ {
  1614  		tmp := x[ix] * alpha
  1615  		if tmp != 0 {
  1616  			atmp := a[i*lda:]
  1617  			jx := kx
  1618  			for j := 0; j < i+1; j++ {
  1619  				atmp[j] += tmp * x[jx]
  1620  				jx += incX
  1621  			}
  1622  		}
  1623  		ix += incX
  1624  	}
  1625  }
  1626  
  1627  // Ssyr2 performs the symmetric rank-two update
  1628  //  A += alpha * x * yᵀ + alpha * y * xᵀ
  1629  // where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar.
  1630  //
  1631  // Float32 implementations are autogenerated and not directly tested.
  1632  func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) {
  1633  	if ul != blas.Lower && ul != blas.Upper {
  1634  		panic(badUplo)
  1635  	}
  1636  	if n < 0 {
  1637  		panic(nLT0)
  1638  	}
  1639  	if lda < max(1, n) {
  1640  		panic(badLdA)
  1641  	}
  1642  	if incX == 0 {
  1643  		panic(zeroIncX)
  1644  	}
  1645  	if incY == 0 {
  1646  		panic(zeroIncY)
  1647  	}
  1648  
  1649  	// Quick return if possible.
  1650  	if n == 0 {
  1651  		return
  1652  	}
  1653  
  1654  	// For zero matrix size the following slice length checks are trivially satisfied.
  1655  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1656  		panic(shortX)
  1657  	}
  1658  	if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  1659  		panic(shortY)
  1660  	}
  1661  	if len(a) < lda*(n-1)+n {
  1662  		panic(shortA)
  1663  	}
  1664  
  1665  	// Quick return if possible.
  1666  	if alpha == 0 {
  1667  		return
  1668  	}
  1669  
  1670  	var ky, kx int
  1671  	if incY < 0 {
  1672  		ky = -(n - 1) * incY
  1673  	}
  1674  	if incX < 0 {
  1675  		kx = -(n - 1) * incX
  1676  	}
  1677  	if ul == blas.Upper {
  1678  		if incX == 1 && incY == 1 {
  1679  			for i := 0; i < n; i++ {
  1680  				xi := x[i]
  1681  				yi := y[i]
  1682  				atmp := a[i*lda:]
  1683  				for j := i; j < n; j++ {
  1684  					atmp[j] += alpha * (xi*y[j] + x[j]*yi)
  1685  				}
  1686  			}
  1687  			return
  1688  		}
  1689  		ix := kx
  1690  		iy := ky
  1691  		for i := 0; i < n; i++ {
  1692  			jx := kx + i*incX
  1693  			jy := ky + i*incY
  1694  			xi := x[ix]
  1695  			yi := y[iy]
  1696  			atmp := a[i*lda:]
  1697  			for j := i; j < n; j++ {
  1698  				atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
  1699  				jx += incX
  1700  				jy += incY
  1701  			}
  1702  			ix += incX
  1703  			iy += incY
  1704  		}
  1705  		return
  1706  	}
  1707  	if incX == 1 && incY == 1 {
  1708  		for i := 0; i < n; i++ {
  1709  			xi := x[i]
  1710  			yi := y[i]
  1711  			atmp := a[i*lda:]
  1712  			for j := 0; j <= i; j++ {
  1713  				atmp[j] += alpha * (xi*y[j] + x[j]*yi)
  1714  			}
  1715  		}
  1716  		return
  1717  	}
  1718  	ix := kx
  1719  	iy := ky
  1720  	for i := 0; i < n; i++ {
  1721  		jx := kx
  1722  		jy := ky
  1723  		xi := x[ix]
  1724  		yi := y[iy]
  1725  		atmp := a[i*lda:]
  1726  		for j := 0; j <= i; j++ {
  1727  			atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
  1728  			jx += incX
  1729  			jy += incY
  1730  		}
  1731  		ix += incX
  1732  		iy += incY
  1733  	}
  1734  }
  1735  
  1736  // Stpsv solves one of the systems of equations
  1737  //  A * x = b   if tA == blas.NoTrans
  1738  //  Aᵀ * x = b  if tA == blas.Trans or blas.ConjTrans
  1739  // where A is an n×n triangular matrix in packed format, and x and b are vectors.
  1740  //
  1741  // At entry to the function, x contains the values of b, and the result is
  1742  // stored in-place into x.
  1743  //
  1744  // No test for singularity or near-singularity is included in this
  1745  // routine. Such tests must be performed before calling this routine.
  1746  //
  1747  // Float32 implementations are autogenerated and not directly tested.
  1748  func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) {
  1749  	if ul != blas.Lower && ul != blas.Upper {
  1750  		panic(badUplo)
  1751  	}
  1752  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
  1753  		panic(badTranspose)
  1754  	}
  1755  	if d != blas.NonUnit && d != blas.Unit {
  1756  		panic(badDiag)
  1757  	}
  1758  	if n < 0 {
  1759  		panic(nLT0)
  1760  	}
  1761  	if incX == 0 {
  1762  		panic(zeroIncX)
  1763  	}
  1764  
  1765  	// Quick return if possible.
  1766  	if n == 0 {
  1767  		return
  1768  	}
  1769  
  1770  	// For zero matrix size the following slice length checks are trivially satisfied.
  1771  	if len(ap) < n*(n+1)/2 {
  1772  		panic(shortAP)
  1773  	}
  1774  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1775  		panic(shortX)
  1776  	}
  1777  
  1778  	var kx int
  1779  	if incX < 0 {
  1780  		kx = -(n - 1) * incX
  1781  	}
  1782  
  1783  	nonUnit := d == blas.NonUnit
  1784  	var offset int // Offset is the index of (i,i)
  1785  	if tA == blas.NoTrans {
  1786  		if ul == blas.Upper {
  1787  			offset = n*(n+1)/2 - 1
  1788  			if incX == 1 {
  1789  				for i := n - 1; i >= 0; i-- {
  1790  					atmp := ap[offset+1 : offset+n-i]
  1791  					xtmp := x[i+1:]
  1792  					var sum float32
  1793  					for j, v := range atmp {
  1794  						sum += v * xtmp[j]
  1795  					}
  1796  					x[i] -= sum
  1797  					if nonUnit {
  1798  						x[i] /= ap[offset]
  1799  					}
  1800  					offset -= n - i + 1
  1801  				}
  1802  				return
  1803  			}
  1804  			ix := kx + (n-1)*incX
  1805  			for i := n - 1; i >= 0; i-- {
  1806  				atmp := ap[offset+1 : offset+n-i]
  1807  				jx := kx + (i+1)*incX
  1808  				var sum float32
  1809  				for _, v := range atmp {
  1810  					sum += v * x[jx]
  1811  					jx += incX
  1812  				}
  1813  				x[ix] -= sum
  1814  				if nonUnit {
  1815  					x[ix] /= ap[offset]
  1816  				}
  1817  				ix -= incX
  1818  				offset -= n - i + 1
  1819  			}
  1820  			return
  1821  		}
  1822  		if incX == 1 {
  1823  			for i := 0; i < n; i++ {
  1824  				atmp := ap[offset-i : offset]
  1825  				var sum float32
  1826  				for j, v := range atmp {
  1827  					sum += v * x[j]
  1828  				}
  1829  				x[i] -= sum
  1830  				if nonUnit {
  1831  					x[i] /= ap[offset]
  1832  				}
  1833  				offset += i + 2
  1834  			}
  1835  			return
  1836  		}
  1837  		ix := kx
  1838  		for i := 0; i < n; i++ {
  1839  			jx := kx
  1840  			atmp := ap[offset-i : offset]
  1841  			var sum float32
  1842  			for _, v := range atmp {
  1843  				sum += v * x[jx]
  1844  				jx += incX
  1845  			}
  1846  			x[ix] -= sum
  1847  			if nonUnit {
  1848  				x[ix] /= ap[offset]
  1849  			}
  1850  			ix += incX
  1851  			offset += i + 2
  1852  		}
  1853  		return
  1854  	}
  1855  	// Cases where ap is transposed.
  1856  	if ul == blas.Upper {
  1857  		if incX == 1 {
  1858  			for i := 0; i < n; i++ {
  1859  				if nonUnit {
  1860  					x[i] /= ap[offset]
  1861  				}
  1862  				xi := x[i]
  1863  				atmp := ap[offset+1 : offset+n-i]
  1864  				xtmp := x[i+1:]
  1865  				for j, v := range atmp {
  1866  					xtmp[j] -= v * xi
  1867  				}
  1868  				offset += n - i
  1869  			}
  1870  			return
  1871  		}
  1872  		ix := kx
  1873  		for i := 0; i < n; i++ {
  1874  			if nonUnit {
  1875  				x[ix] /= ap[offset]
  1876  			}
  1877  			xix := x[ix]
  1878  			atmp := ap[offset+1 : offset+n-i]
  1879  			jx := kx + (i+1)*incX
  1880  			for _, v := range atmp {
  1881  				x[jx] -= v * xix
  1882  				jx += incX
  1883  			}
  1884  			ix += incX
  1885  			offset += n - i
  1886  		}
  1887  		return
  1888  	}
  1889  	if incX == 1 {
  1890  		offset = n*(n+1)/2 - 1
  1891  		for i := n - 1; i >= 0; i-- {
  1892  			if nonUnit {
  1893  				x[i] /= ap[offset]
  1894  			}
  1895  			xi := x[i]
  1896  			atmp := ap[offset-i : offset]
  1897  			for j, v := range atmp {
  1898  				x[j] -= v * xi
  1899  			}
  1900  			offset -= i + 1
  1901  		}
  1902  		return
  1903  	}
  1904  	ix := kx + (n-1)*incX
  1905  	offset = n*(n+1)/2 - 1
  1906  	for i := n - 1; i >= 0; i-- {
  1907  		if nonUnit {
  1908  			x[ix] /= ap[offset]
  1909  		}
  1910  		xix := x[ix]
  1911  		atmp := ap[offset-i : offset]
  1912  		jx := kx
  1913  		for _, v := range atmp {
  1914  			x[jx] -= v * xix
  1915  			jx += incX
  1916  		}
  1917  		ix -= incX
  1918  		offset -= i + 1
  1919  	}
  1920  }
  1921  
  1922  // Sspmv performs the matrix-vector operation
  1923  //  y = alpha * A * x + beta * y
  1924  // where A is an n×n symmetric matrix in packed format, x and y are vectors,
  1925  // and alpha and beta are scalars.
  1926  //
  1927  // Float32 implementations are autogenerated and not directly tested.
  1928  func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) {
  1929  	if ul != blas.Lower && ul != blas.Upper {
  1930  		panic(badUplo)
  1931  	}
  1932  	if n < 0 {
  1933  		panic(nLT0)
  1934  	}
  1935  	if incX == 0 {
  1936  		panic(zeroIncX)
  1937  	}
  1938  	if incY == 0 {
  1939  		panic(zeroIncY)
  1940  	}
  1941  
  1942  	// Quick return if possible.
  1943  	if n == 0 {
  1944  		return
  1945  	}
  1946  
  1947  	// For zero matrix size the following slice length checks are trivially satisfied.
  1948  	if len(ap) < n*(n+1)/2 {
  1949  		panic(shortAP)
  1950  	}
  1951  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  1952  		panic(shortX)
  1953  	}
  1954  	if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  1955  		panic(shortY)
  1956  	}
  1957  
  1958  	// Quick return if possible.
  1959  	if alpha == 0 && beta == 1 {
  1960  		return
  1961  	}
  1962  
  1963  	// Set up start points
  1964  	var kx, ky int
  1965  	if incX < 0 {
  1966  		kx = -(n - 1) * incX
  1967  	}
  1968  	if incY < 0 {
  1969  		ky = -(n - 1) * incY
  1970  	}
  1971  
  1972  	// Form y = beta * y.
  1973  	if beta != 1 {
  1974  		if incY == 1 {
  1975  			if beta == 0 {
  1976  				for i := range y[:n] {
  1977  					y[i] = 0
  1978  				}
  1979  			} else {
  1980  				f32.ScalUnitary(beta, y[:n])
  1981  			}
  1982  		} else {
  1983  			iy := ky
  1984  			if beta == 0 {
  1985  				for i := 0; i < n; i++ {
  1986  					y[iy] = 0
  1987  					iy += incY
  1988  				}
  1989  			} else {
  1990  				if incY > 0 {
  1991  					f32.ScalInc(beta, y, uintptr(n), uintptr(incY))
  1992  				} else {
  1993  					f32.ScalInc(beta, y, uintptr(n), uintptr(-incY))
  1994  				}
  1995  			}
  1996  		}
  1997  	}
  1998  
  1999  	if alpha == 0 {
  2000  		return
  2001  	}
  2002  
  2003  	if n == 1 {
  2004  		y[0] += alpha * ap[0] * x[0]
  2005  		return
  2006  	}
  2007  	var offset int // Offset is the index of (i,i).
  2008  	if ul == blas.Upper {
  2009  		if incX == 1 {
  2010  			iy := ky
  2011  			for i := 0; i < n; i++ {
  2012  				xv := x[i] * alpha
  2013  				sum := ap[offset] * x[i]
  2014  				atmp := ap[offset+1 : offset+n-i]
  2015  				xtmp := x[i+1:]
  2016  				jy := ky + (i+1)*incY
  2017  				for j, v := range atmp {
  2018  					sum += v * xtmp[j]
  2019  					y[jy] += v * xv
  2020  					jy += incY
  2021  				}
  2022  				y[iy] += alpha * sum
  2023  				iy += incY
  2024  				offset += n - i
  2025  			}
  2026  			return
  2027  		}
  2028  		ix := kx
  2029  		iy := ky
  2030  		for i := 0; i < n; i++ {
  2031  			xv := x[ix] * alpha
  2032  			sum := ap[offset] * x[ix]
  2033  			atmp := ap[offset+1 : offset+n-i]
  2034  			jx := kx + (i+1)*incX
  2035  			jy := ky + (i+1)*incY
  2036  			for _, v := range atmp {
  2037  				sum += v * x[jx]
  2038  				y[jy] += v * xv
  2039  				jx += incX
  2040  				jy += incY
  2041  			}
  2042  			y[iy] += alpha * sum
  2043  			ix += incX
  2044  			iy += incY
  2045  			offset += n - i
  2046  		}
  2047  		return
  2048  	}
  2049  	if incX == 1 {
  2050  		iy := ky
  2051  		for i := 0; i < n; i++ {
  2052  			xv := x[i] * alpha
  2053  			atmp := ap[offset-i : offset]
  2054  			jy := ky
  2055  			var sum float32
  2056  			for j, v := range atmp {
  2057  				sum += v * x[j]
  2058  				y[jy] += v * xv
  2059  				jy += incY
  2060  			}
  2061  			sum += ap[offset] * x[i]
  2062  			y[iy] += alpha * sum
  2063  			iy += incY
  2064  			offset += i + 2
  2065  		}
  2066  		return
  2067  	}
  2068  	ix := kx
  2069  	iy := ky
  2070  	for i := 0; i < n; i++ {
  2071  		xv := x[ix] * alpha
  2072  		atmp := ap[offset-i : offset]
  2073  		jx := kx
  2074  		jy := ky
  2075  		var sum float32
  2076  		for _, v := range atmp {
  2077  			sum += v * x[jx]
  2078  			y[jy] += v * xv
  2079  			jx += incX
  2080  			jy += incY
  2081  		}
  2082  
  2083  		sum += ap[offset] * x[ix]
  2084  		y[iy] += alpha * sum
  2085  		ix += incX
  2086  		iy += incY
  2087  		offset += i + 2
  2088  	}
  2089  }
  2090  
  2091  // Sspr performs the symmetric rank-one operation
  2092  //  A += alpha * x * xᵀ
  2093  // where A is an n×n symmetric matrix in packed format, x is a vector, and
  2094  // alpha is a scalar.
  2095  //
  2096  // Float32 implementations are autogenerated and not directly tested.
  2097  func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, ap []float32) {
  2098  	if ul != blas.Lower && ul != blas.Upper {
  2099  		panic(badUplo)
  2100  	}
  2101  	if n < 0 {
  2102  		panic(nLT0)
  2103  	}
  2104  	if incX == 0 {
  2105  		panic(zeroIncX)
  2106  	}
  2107  
  2108  	// Quick return if possible.
  2109  	if n == 0 {
  2110  		return
  2111  	}
  2112  
  2113  	// For zero matrix size the following slice length checks are trivially satisfied.
  2114  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  2115  		panic(shortX)
  2116  	}
  2117  	if len(ap) < n*(n+1)/2 {
  2118  		panic(shortAP)
  2119  	}
  2120  
  2121  	// Quick return if possible.
  2122  	if alpha == 0 {
  2123  		return
  2124  	}
  2125  
  2126  	lenX := n
  2127  	var kx int
  2128  	if incX < 0 {
  2129  		kx = -(lenX - 1) * incX
  2130  	}
  2131  	var offset int // Offset is the index of (i,i).
  2132  	if ul == blas.Upper {
  2133  		if incX == 1 {
  2134  			for i := 0; i < n; i++ {
  2135  				atmp := ap[offset:]
  2136  				xv := alpha * x[i]
  2137  				xtmp := x[i:n]
  2138  				for j, v := range xtmp {
  2139  					atmp[j] += xv * v
  2140  				}
  2141  				offset += n - i
  2142  			}
  2143  			return
  2144  		}
  2145  		ix := kx
  2146  		for i := 0; i < n; i++ {
  2147  			jx := kx + i*incX
  2148  			atmp := ap[offset:]
  2149  			xv := alpha * x[ix]
  2150  			for j := 0; j < n-i; j++ {
  2151  				atmp[j] += xv * x[jx]
  2152  				jx += incX
  2153  			}
  2154  			ix += incX
  2155  			offset += n - i
  2156  		}
  2157  		return
  2158  	}
  2159  	if incX == 1 {
  2160  		for i := 0; i < n; i++ {
  2161  			atmp := ap[offset-i:]
  2162  			xv := alpha * x[i]
  2163  			xtmp := x[:i+1]
  2164  			for j, v := range xtmp {
  2165  				atmp[j] += xv * v
  2166  			}
  2167  			offset += i + 2
  2168  		}
  2169  		return
  2170  	}
  2171  	ix := kx
  2172  	for i := 0; i < n; i++ {
  2173  		jx := kx
  2174  		atmp := ap[offset-i:]
  2175  		xv := alpha * x[ix]
  2176  		for j := 0; j <= i; j++ {
  2177  			atmp[j] += xv * x[jx]
  2178  			jx += incX
  2179  		}
  2180  		ix += incX
  2181  		offset += i + 2
  2182  	}
  2183  }
  2184  
  2185  // Sspr2 performs the symmetric rank-2 update
  2186  //  A += alpha * x * yᵀ + alpha * y * xᵀ
  2187  // where A is an n×n symmetric matrix in packed format, x and y are vectors,
  2188  // and alpha is a scalar.
  2189  //
  2190  // Float32 implementations are autogenerated and not directly tested.
  2191  func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) {
  2192  	if ul != blas.Lower && ul != blas.Upper {
  2193  		panic(badUplo)
  2194  	}
  2195  	if n < 0 {
  2196  		panic(nLT0)
  2197  	}
  2198  	if incX == 0 {
  2199  		panic(zeroIncX)
  2200  	}
  2201  	if incY == 0 {
  2202  		panic(zeroIncY)
  2203  	}
  2204  
  2205  	// Quick return if possible.
  2206  	if n == 0 {
  2207  		return
  2208  	}
  2209  
  2210  	// For zero matrix size the following slice length checks are trivially satisfied.
  2211  	if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) {
  2212  		panic(shortX)
  2213  	}
  2214  	if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) {
  2215  		panic(shortY)
  2216  	}
  2217  	if len(ap) < n*(n+1)/2 {
  2218  		panic(shortAP)
  2219  	}
  2220  
  2221  	// Quick return if possible.
  2222  	if alpha == 0 {
  2223  		return
  2224  	}
  2225  
  2226  	var ky, kx int
  2227  	if incY < 0 {
  2228  		ky = -(n - 1) * incY
  2229  	}
  2230  	if incX < 0 {
  2231  		kx = -(n - 1) * incX
  2232  	}
  2233  	var offset int // Offset is the index of (i,i).
  2234  	if ul == blas.Upper {
  2235  		if incX == 1 && incY == 1 {
  2236  			for i := 0; i < n; i++ {
  2237  				atmp := ap[offset:]
  2238  				xi := x[i]
  2239  				yi := y[i]
  2240  				xtmp := x[i:n]
  2241  				ytmp := y[i:n]
  2242  				for j, v := range xtmp {
  2243  					atmp[j] += alpha * (xi*ytmp[j] + v*yi)
  2244  				}
  2245  				offset += n - i
  2246  			}
  2247  			return
  2248  		}
  2249  		ix := kx
  2250  		iy := ky
  2251  		for i := 0; i < n; i++ {
  2252  			jx := kx + i*incX
  2253  			jy := ky + i*incY
  2254  			atmp := ap[offset:]
  2255  			xi := x[ix]
  2256  			yi := y[iy]
  2257  			for j := 0; j < n-i; j++ {
  2258  				atmp[j] += alpha * (xi*y[jy] + x[jx]*yi)
  2259  				jx += incX
  2260  				jy += incY
  2261  			}
  2262  			ix += incX
  2263  			iy += incY
  2264  			offset += n - i
  2265  		}
  2266  		return
  2267  	}
  2268  	if incX == 1 && incY == 1 {
  2269  		for i := 0; i < n; i++ {
  2270  			atmp := ap[offset-i:]
  2271  			xi := x[i]
  2272  			yi := y[i]
  2273  			xtmp := x[:i+1]
  2274  			for j, v := range xtmp {
  2275  				atmp[j] += alpha * (xi*y[j] + v*yi)
  2276  			}
  2277  			offset += i + 2
  2278  		}
  2279  		return
  2280  	}
  2281  	ix := kx
  2282  	iy := ky
  2283  	for i := 0; i < n; i++ {
  2284  		jx := kx
  2285  		jy := ky
  2286  		atmp := ap[offset-i:]
  2287  		for j := 0; j <= i; j++ {
  2288  			atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy])
  2289  			jx += incX
  2290  			jy += incY
  2291  		}
  2292  		ix += incX
  2293  		iy += incY
  2294  		offset += i + 2
  2295  	}
  2296  }