gonum.org/v1/gonum@v0.14.0/blas/gonum/level3float32.go (about)

     1  // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
     2  
     3  // Copyright ©2014 The Gonum Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package gonum
     8  
     9  import (
    10  	"gonum.org/v1/gonum/blas"
    11  	"gonum.org/v1/gonum/internal/asm/f32"
    12  )
    13  
    14  var _ blas.Float32Level3 = Implementation{}
    15  
    16  // Strsm solves one of the matrix equations
    17  //
    18  //	A * X = alpha * B   if tA == blas.NoTrans and side == blas.Left
    19  //	Aᵀ * X = alpha * B  if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
    20  //	X * A = alpha * B   if tA == blas.NoTrans and side == blas.Right
    21  //	X * Aᵀ = alpha * B  if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
    22  //
    23  // where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a
    24  // scalar.
    25  //
    26  // At entry to the function, X contains the values of B, and the result is
    27  // stored in-place into X.
    28  //
    29  // No check is made that A is invertible.
    30  //
    31  // Float32 implementations are autogenerated and not directly tested.
    32  func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
    33  	if s != blas.Left && s != blas.Right {
    34  		panic(badSide)
    35  	}
    36  	if ul != blas.Lower && ul != blas.Upper {
    37  		panic(badUplo)
    38  	}
    39  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
    40  		panic(badTranspose)
    41  	}
    42  	if d != blas.NonUnit && d != blas.Unit {
    43  		panic(badDiag)
    44  	}
    45  	if m < 0 {
    46  		panic(mLT0)
    47  	}
    48  	if n < 0 {
    49  		panic(nLT0)
    50  	}
    51  	k := n
    52  	if s == blas.Left {
    53  		k = m
    54  	}
    55  	if lda < max(1, k) {
    56  		panic(badLdA)
    57  	}
    58  	if ldb < max(1, n) {
    59  		panic(badLdB)
    60  	}
    61  
    62  	// Quick return if possible.
    63  	if m == 0 || n == 0 {
    64  		return
    65  	}
    66  
    67  	// For zero matrix size the following slice length checks are trivially satisfied.
    68  	if len(a) < lda*(k-1)+k {
    69  		panic(shortA)
    70  	}
    71  	if len(b) < ldb*(m-1)+n {
    72  		panic(shortB)
    73  	}
    74  
    75  	if alpha == 0 {
    76  		for i := 0; i < m; i++ {
    77  			btmp := b[i*ldb : i*ldb+n]
    78  			for j := range btmp {
    79  				btmp[j] = 0
    80  			}
    81  		}
    82  		return
    83  	}
    84  	nonUnit := d == blas.NonUnit
    85  	if s == blas.Left {
    86  		if tA == blas.NoTrans {
    87  			if ul == blas.Upper {
    88  				for i := m - 1; i >= 0; i-- {
    89  					btmp := b[i*ldb : i*ldb+n]
    90  					if alpha != 1 {
    91  						f32.ScalUnitary(alpha, btmp)
    92  					}
    93  					for ka, va := range a[i*lda+i+1 : i*lda+m] {
    94  						if va != 0 {
    95  							k := ka + i + 1
    96  							f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
    97  						}
    98  					}
    99  					if nonUnit {
   100  						tmp := 1 / a[i*lda+i]
   101  						f32.ScalUnitary(tmp, btmp)
   102  					}
   103  				}
   104  				return
   105  			}
   106  			for i := 0; i < m; i++ {
   107  				btmp := b[i*ldb : i*ldb+n]
   108  				if alpha != 1 {
   109  					f32.ScalUnitary(alpha, btmp)
   110  				}
   111  				for k, va := range a[i*lda : i*lda+i] {
   112  					if va != 0 {
   113  						f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
   114  					}
   115  				}
   116  				if nonUnit {
   117  					tmp := 1 / a[i*lda+i]
   118  					f32.ScalUnitary(tmp, btmp)
   119  				}
   120  			}
   121  			return
   122  		}
   123  		// Cases where a is transposed
   124  		if ul == blas.Upper {
   125  			for k := 0; k < m; k++ {
   126  				btmpk := b[k*ldb : k*ldb+n]
   127  				if nonUnit {
   128  					tmp := 1 / a[k*lda+k]
   129  					f32.ScalUnitary(tmp, btmpk)
   130  				}
   131  				for ia, va := range a[k*lda+k+1 : k*lda+m] {
   132  					if va != 0 {
   133  						i := ia + k + 1
   134  						f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
   135  					}
   136  				}
   137  				if alpha != 1 {
   138  					f32.ScalUnitary(alpha, btmpk)
   139  				}
   140  			}
   141  			return
   142  		}
   143  		for k := m - 1; k >= 0; k-- {
   144  			btmpk := b[k*ldb : k*ldb+n]
   145  			if nonUnit {
   146  				tmp := 1 / a[k*lda+k]
   147  				f32.ScalUnitary(tmp, btmpk)
   148  			}
   149  			for i, va := range a[k*lda : k*lda+k] {
   150  				if va != 0 {
   151  					f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
   152  				}
   153  			}
   154  			if alpha != 1 {
   155  				f32.ScalUnitary(alpha, btmpk)
   156  			}
   157  		}
   158  		return
   159  	}
   160  	// Cases where a is to the right of X.
   161  	if tA == blas.NoTrans {
   162  		if ul == blas.Upper {
   163  			for i := 0; i < m; i++ {
   164  				btmp := b[i*ldb : i*ldb+n]
   165  				if alpha != 1 {
   166  					f32.ScalUnitary(alpha, btmp)
   167  				}
   168  				for k, vb := range btmp {
   169  					if vb == 0 {
   170  						continue
   171  					}
   172  					if nonUnit {
   173  						btmp[k] /= a[k*lda+k]
   174  					}
   175  					f32.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n])
   176  				}
   177  			}
   178  			return
   179  		}
   180  		for i := 0; i < m; i++ {
   181  			btmp := b[i*ldb : i*ldb+n]
   182  			if alpha != 1 {
   183  				f32.ScalUnitary(alpha, btmp)
   184  			}
   185  			for k := n - 1; k >= 0; k-- {
   186  				if btmp[k] == 0 {
   187  					continue
   188  				}
   189  				if nonUnit {
   190  					btmp[k] /= a[k*lda+k]
   191  				}
   192  				f32.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k])
   193  			}
   194  		}
   195  		return
   196  	}
   197  	// Cases where a is transposed.
   198  	if ul == blas.Upper {
   199  		for i := 0; i < m; i++ {
   200  			btmp := b[i*ldb : i*ldb+n]
   201  			for j := n - 1; j >= 0; j-- {
   202  				tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
   203  				if nonUnit {
   204  					tmp /= a[j*lda+j]
   205  				}
   206  				btmp[j] = tmp
   207  			}
   208  		}
   209  		return
   210  	}
   211  	for i := 0; i < m; i++ {
   212  		btmp := b[i*ldb : i*ldb+n]
   213  		for j := 0; j < n; j++ {
   214  			tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
   215  			if nonUnit {
   216  				tmp /= a[j*lda+j]
   217  			}
   218  			btmp[j] = tmp
   219  		}
   220  	}
   221  }
   222  
   223  // Ssymm performs one of the matrix-matrix operations
   224  //
   225  //	C = alpha * A * B + beta * C  if side == blas.Left
   226  //	C = alpha * B * A + beta * C  if side == blas.Right
   227  //
   228  // where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
   229  // is a scalar.
   230  //
   231  // Float32 implementations are autogenerated and not directly tested.
   232  func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
   233  	if s != blas.Right && s != blas.Left {
   234  		panic(badSide)
   235  	}
   236  	if ul != blas.Lower && ul != blas.Upper {
   237  		panic(badUplo)
   238  	}
   239  	if m < 0 {
   240  		panic(mLT0)
   241  	}
   242  	if n < 0 {
   243  		panic(nLT0)
   244  	}
   245  	k := n
   246  	if s == blas.Left {
   247  		k = m
   248  	}
   249  	if lda < max(1, k) {
   250  		panic(badLdA)
   251  	}
   252  	if ldb < max(1, n) {
   253  		panic(badLdB)
   254  	}
   255  	if ldc < max(1, n) {
   256  		panic(badLdC)
   257  	}
   258  
   259  	// Quick return if possible.
   260  	if m == 0 || n == 0 {
   261  		return
   262  	}
   263  
   264  	// For zero matrix size the following slice length checks are trivially satisfied.
   265  	if len(a) < lda*(k-1)+k {
   266  		panic(shortA)
   267  	}
   268  	if len(b) < ldb*(m-1)+n {
   269  		panic(shortB)
   270  	}
   271  	if len(c) < ldc*(m-1)+n {
   272  		panic(shortC)
   273  	}
   274  
   275  	// Quick return if possible.
   276  	if alpha == 0 && beta == 1 {
   277  		return
   278  	}
   279  
   280  	if beta == 0 {
   281  		for i := 0; i < m; i++ {
   282  			ctmp := c[i*ldc : i*ldc+n]
   283  			for j := range ctmp {
   284  				ctmp[j] = 0
   285  			}
   286  		}
   287  	}
   288  
   289  	if alpha == 0 {
   290  		if beta != 0 {
   291  			for i := 0; i < m; i++ {
   292  				ctmp := c[i*ldc : i*ldc+n]
   293  				for j := 0; j < n; j++ {
   294  					ctmp[j] *= beta
   295  				}
   296  			}
   297  		}
   298  		return
   299  	}
   300  
   301  	isUpper := ul == blas.Upper
   302  	if s == blas.Left {
   303  		for i := 0; i < m; i++ {
   304  			atmp := alpha * a[i*lda+i]
   305  			btmp := b[i*ldb : i*ldb+n]
   306  			ctmp := c[i*ldc : i*ldc+n]
   307  			for j, v := range btmp {
   308  				ctmp[j] *= beta
   309  				ctmp[j] += atmp * v
   310  			}
   311  
   312  			for k := 0; k < i; k++ {
   313  				var atmp float32
   314  				if isUpper {
   315  					atmp = a[k*lda+i]
   316  				} else {
   317  					atmp = a[i*lda+k]
   318  				}
   319  				atmp *= alpha
   320  				f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
   321  			}
   322  			for k := i + 1; k < m; k++ {
   323  				var atmp float32
   324  				if isUpper {
   325  					atmp = a[i*lda+k]
   326  				} else {
   327  					atmp = a[k*lda+i]
   328  				}
   329  				atmp *= alpha
   330  				f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
   331  			}
   332  		}
   333  		return
   334  	}
   335  	if isUpper {
   336  		for i := 0; i < m; i++ {
   337  			for j := n - 1; j >= 0; j-- {
   338  				tmp := alpha * b[i*ldb+j]
   339  				var tmp2 float32
   340  				atmp := a[j*lda+j+1 : j*lda+n]
   341  				btmp := b[i*ldb+j+1 : i*ldb+n]
   342  				ctmp := c[i*ldc+j+1 : i*ldc+n]
   343  				for k, v := range atmp {
   344  					ctmp[k] += tmp * v
   345  					tmp2 += btmp[k] * v
   346  				}
   347  				c[i*ldc+j] *= beta
   348  				c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
   349  			}
   350  		}
   351  		return
   352  	}
   353  	for i := 0; i < m; i++ {
   354  		for j := 0; j < n; j++ {
   355  			tmp := alpha * b[i*ldb+j]
   356  			var tmp2 float32
   357  			atmp := a[j*lda : j*lda+j]
   358  			btmp := b[i*ldb : i*ldb+j]
   359  			ctmp := c[i*ldc : i*ldc+j]
   360  			for k, v := range atmp {
   361  				ctmp[k] += tmp * v
   362  				tmp2 += btmp[k] * v
   363  			}
   364  			c[i*ldc+j] *= beta
   365  			c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
   366  		}
   367  	}
   368  }
   369  
   370  // Ssyrk performs one of the symmetric rank-k operations
   371  //
   372  //	C = alpha * A * Aᵀ + beta * C  if tA == blas.NoTrans
   373  //	C = alpha * Aᵀ * A + beta * C  if tA == blas.Trans or tA == blas.ConjTrans
   374  //
   375  // where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and
   376  // beta are scalars.
   377  //
   378  // Float32 implementations are autogenerated and not directly tested.
   379  func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) {
   380  	if ul != blas.Lower && ul != blas.Upper {
   381  		panic(badUplo)
   382  	}
   383  	if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
   384  		panic(badTranspose)
   385  	}
   386  	if n < 0 {
   387  		panic(nLT0)
   388  	}
   389  	if k < 0 {
   390  		panic(kLT0)
   391  	}
   392  	row, col := k, n
   393  	if tA == blas.NoTrans {
   394  		row, col = n, k
   395  	}
   396  	if lda < max(1, col) {
   397  		panic(badLdA)
   398  	}
   399  	if ldc < max(1, n) {
   400  		panic(badLdC)
   401  	}
   402  
   403  	// Quick return if possible.
   404  	if n == 0 {
   405  		return
   406  	}
   407  
   408  	// For zero matrix size the following slice length checks are trivially satisfied.
   409  	if len(a) < lda*(row-1)+col {
   410  		panic(shortA)
   411  	}
   412  	if len(c) < ldc*(n-1)+n {
   413  		panic(shortC)
   414  	}
   415  
   416  	if alpha == 0 {
   417  		if beta == 0 {
   418  			if ul == blas.Upper {
   419  				for i := 0; i < n; i++ {
   420  					ctmp := c[i*ldc+i : i*ldc+n]
   421  					for j := range ctmp {
   422  						ctmp[j] = 0
   423  					}
   424  				}
   425  				return
   426  			}
   427  			for i := 0; i < n; i++ {
   428  				ctmp := c[i*ldc : i*ldc+i+1]
   429  				for j := range ctmp {
   430  					ctmp[j] = 0
   431  				}
   432  			}
   433  			return
   434  		}
   435  		if ul == blas.Upper {
   436  			for i := 0; i < n; i++ {
   437  				ctmp := c[i*ldc+i : i*ldc+n]
   438  				for j := range ctmp {
   439  					ctmp[j] *= beta
   440  				}
   441  			}
   442  			return
   443  		}
   444  		for i := 0; i < n; i++ {
   445  			ctmp := c[i*ldc : i*ldc+i+1]
   446  			for j := range ctmp {
   447  				ctmp[j] *= beta
   448  			}
   449  		}
   450  		return
   451  	}
   452  	if tA == blas.NoTrans {
   453  		if ul == blas.Upper {
   454  			for i := 0; i < n; i++ {
   455  				ctmp := c[i*ldc+i : i*ldc+n]
   456  				atmp := a[i*lda : i*lda+k]
   457  				if beta == 0 {
   458  					for jc := range ctmp {
   459  						j := jc + i
   460  						ctmp[jc] = alpha * f32.DotUnitary(atmp, a[j*lda:j*lda+k])
   461  					}
   462  				} else {
   463  					for jc, vc := range ctmp {
   464  						j := jc + i
   465  						ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k])
   466  					}
   467  				}
   468  			}
   469  			return
   470  		}
   471  		for i := 0; i < n; i++ {
   472  			ctmp := c[i*ldc : i*ldc+i+1]
   473  			atmp := a[i*lda : i*lda+k]
   474  			if beta == 0 {
   475  				for j := range ctmp {
   476  					ctmp[j] = alpha * f32.DotUnitary(a[j*lda:j*lda+k], atmp)
   477  				}
   478  			} else {
   479  				for j, vc := range ctmp {
   480  					ctmp[j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp)
   481  				}
   482  			}
   483  		}
   484  		return
   485  	}
   486  	// Cases where a is transposed.
   487  	if ul == blas.Upper {
   488  		for i := 0; i < n; i++ {
   489  			ctmp := c[i*ldc+i : i*ldc+n]
   490  			if beta == 0 {
   491  				for j := range ctmp {
   492  					ctmp[j] = 0
   493  				}
   494  			} else if beta != 1 {
   495  				for j := range ctmp {
   496  					ctmp[j] *= beta
   497  				}
   498  			}
   499  			for l := 0; l < k; l++ {
   500  				tmp := alpha * a[l*lda+i]
   501  				if tmp != 0 {
   502  					f32.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp)
   503  				}
   504  			}
   505  		}
   506  		return
   507  	}
   508  	for i := 0; i < n; i++ {
   509  		ctmp := c[i*ldc : i*ldc+i+1]
   510  		if beta != 1 {
   511  			for j := range ctmp {
   512  				ctmp[j] *= beta
   513  			}
   514  		}
   515  		for l := 0; l < k; l++ {
   516  			tmp := alpha * a[l*lda+i]
   517  			if tmp != 0 {
   518  				f32.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp)
   519  			}
   520  		}
   521  	}
   522  }
   523  
   524  // Ssyr2k performs one of the symmetric rank 2k operations
   525  //
   526  //	C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C  if tA == blas.NoTrans
   527  //	C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C  if tA == blas.Trans or tA == blas.ConjTrans
   528  //
   529  // where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and
   530  // alpha and beta are scalars.
   531  //
   532  // Float32 implementations are autogenerated and not directly tested.
   533  func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) {
   534  	if ul != blas.Lower && ul != blas.Upper {
   535  		panic(badUplo)
   536  	}
   537  	if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
   538  		panic(badTranspose)
   539  	}
   540  	if n < 0 {
   541  		panic(nLT0)
   542  	}
   543  	if k < 0 {
   544  		panic(kLT0)
   545  	}
   546  	row, col := k, n
   547  	if tA == blas.NoTrans {
   548  		row, col = n, k
   549  	}
   550  	if lda < max(1, col) {
   551  		panic(badLdA)
   552  	}
   553  	if ldb < max(1, col) {
   554  		panic(badLdB)
   555  	}
   556  	if ldc < max(1, n) {
   557  		panic(badLdC)
   558  	}
   559  
   560  	// Quick return if possible.
   561  	if n == 0 {
   562  		return
   563  	}
   564  
   565  	// For zero matrix size the following slice length checks are trivially satisfied.
   566  	if len(a) < lda*(row-1)+col {
   567  		panic(shortA)
   568  	}
   569  	if len(b) < ldb*(row-1)+col {
   570  		panic(shortB)
   571  	}
   572  	if len(c) < ldc*(n-1)+n {
   573  		panic(shortC)
   574  	}
   575  
   576  	if alpha == 0 {
   577  		if beta == 0 {
   578  			if ul == blas.Upper {
   579  				for i := 0; i < n; i++ {
   580  					ctmp := c[i*ldc+i : i*ldc+n]
   581  					for j := range ctmp {
   582  						ctmp[j] = 0
   583  					}
   584  				}
   585  				return
   586  			}
   587  			for i := 0; i < n; i++ {
   588  				ctmp := c[i*ldc : i*ldc+i+1]
   589  				for j := range ctmp {
   590  					ctmp[j] = 0
   591  				}
   592  			}
   593  			return
   594  		}
   595  		if ul == blas.Upper {
   596  			for i := 0; i < n; i++ {
   597  				ctmp := c[i*ldc+i : i*ldc+n]
   598  				for j := range ctmp {
   599  					ctmp[j] *= beta
   600  				}
   601  			}
   602  			return
   603  		}
   604  		for i := 0; i < n; i++ {
   605  			ctmp := c[i*ldc : i*ldc+i+1]
   606  			for j := range ctmp {
   607  				ctmp[j] *= beta
   608  			}
   609  		}
   610  		return
   611  	}
   612  	if tA == blas.NoTrans {
   613  		if ul == blas.Upper {
   614  			for i := 0; i < n; i++ {
   615  				atmp := a[i*lda : i*lda+k]
   616  				btmp := b[i*ldb : i*ldb+k]
   617  				ctmp := c[i*ldc+i : i*ldc+n]
   618  				if beta == 0 {
   619  					for jc := range ctmp {
   620  						j := i + jc
   621  						var tmp1, tmp2 float32
   622  						binner := b[j*ldb : j*ldb+k]
   623  						for l, v := range a[j*lda : j*lda+k] {
   624  							tmp1 += v * btmp[l]
   625  							tmp2 += atmp[l] * binner[l]
   626  						}
   627  						ctmp[jc] = alpha * (tmp1 + tmp2)
   628  					}
   629  				} else {
   630  					for jc := range ctmp {
   631  						j := i + jc
   632  						var tmp1, tmp2 float32
   633  						binner := b[j*ldb : j*ldb+k]
   634  						for l, v := range a[j*lda : j*lda+k] {
   635  							tmp1 += v * btmp[l]
   636  							tmp2 += atmp[l] * binner[l]
   637  						}
   638  						ctmp[jc] *= beta
   639  						ctmp[jc] += alpha * (tmp1 + tmp2)
   640  					}
   641  				}
   642  			}
   643  			return
   644  		}
   645  		for i := 0; i < n; i++ {
   646  			atmp := a[i*lda : i*lda+k]
   647  			btmp := b[i*ldb : i*ldb+k]
   648  			ctmp := c[i*ldc : i*ldc+i+1]
   649  			if beta == 0 {
   650  				for j := 0; j <= i; j++ {
   651  					var tmp1, tmp2 float32
   652  					binner := b[j*ldb : j*ldb+k]
   653  					for l, v := range a[j*lda : j*lda+k] {
   654  						tmp1 += v * btmp[l]
   655  						tmp2 += atmp[l] * binner[l]
   656  					}
   657  					ctmp[j] = alpha * (tmp1 + tmp2)
   658  				}
   659  			} else {
   660  				for j := 0; j <= i; j++ {
   661  					var tmp1, tmp2 float32
   662  					binner := b[j*ldb : j*ldb+k]
   663  					for l, v := range a[j*lda : j*lda+k] {
   664  						tmp1 += v * btmp[l]
   665  						tmp2 += atmp[l] * binner[l]
   666  					}
   667  					ctmp[j] *= beta
   668  					ctmp[j] += alpha * (tmp1 + tmp2)
   669  				}
   670  			}
   671  		}
   672  		return
   673  	}
   674  	if ul == blas.Upper {
   675  		for i := 0; i < n; i++ {
   676  			ctmp := c[i*ldc+i : i*ldc+n]
   677  			switch beta {
   678  			case 0:
   679  				for j := range ctmp {
   680  					ctmp[j] = 0
   681  				}
   682  			case 1:
   683  			default:
   684  				for j := range ctmp {
   685  					ctmp[j] *= beta
   686  				}
   687  			}
   688  			for l := 0; l < k; l++ {
   689  				tmp1 := alpha * b[l*ldb+i]
   690  				tmp2 := alpha * a[l*lda+i]
   691  				btmp := b[l*ldb+i : l*ldb+n]
   692  				if tmp1 != 0 || tmp2 != 0 {
   693  					for j, v := range a[l*lda+i : l*lda+n] {
   694  						ctmp[j] += v*tmp1 + btmp[j]*tmp2
   695  					}
   696  				}
   697  			}
   698  		}
   699  		return
   700  	}
   701  	for i := 0; i < n; i++ {
   702  		ctmp := c[i*ldc : i*ldc+i+1]
   703  		switch beta {
   704  		case 0:
   705  			for j := range ctmp {
   706  				ctmp[j] = 0
   707  			}
   708  		case 1:
   709  		default:
   710  			for j := range ctmp {
   711  				ctmp[j] *= beta
   712  			}
   713  		}
   714  		for l := 0; l < k; l++ {
   715  			tmp1 := alpha * b[l*ldb+i]
   716  			tmp2 := alpha * a[l*lda+i]
   717  			btmp := b[l*ldb : l*ldb+i+1]
   718  			if tmp1 != 0 || tmp2 != 0 {
   719  				for j, v := range a[l*lda : l*lda+i+1] {
   720  					ctmp[j] += v*tmp1 + btmp[j]*tmp2
   721  				}
   722  			}
   723  		}
   724  	}
   725  }
   726  
   727  // Strmm performs one of the matrix-matrix operations
   728  //
   729  //	B = alpha * A * B   if tA == blas.NoTrans and side == blas.Left
   730  //	B = alpha * Aᵀ * B  if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
   731  //	B = alpha * B * A   if tA == blas.NoTrans and side == blas.Right
   732  //	B = alpha * B * Aᵀ  if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
   733  //
   734  // where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar.
   735  //
   736  // Float32 implementations are autogenerated and not directly tested.
   737  func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) {
   738  	if s != blas.Left && s != blas.Right {
   739  		panic(badSide)
   740  	}
   741  	if ul != blas.Lower && ul != blas.Upper {
   742  		panic(badUplo)
   743  	}
   744  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
   745  		panic(badTranspose)
   746  	}
   747  	if d != blas.NonUnit && d != blas.Unit {
   748  		panic(badDiag)
   749  	}
   750  	if m < 0 {
   751  		panic(mLT0)
   752  	}
   753  	if n < 0 {
   754  		panic(nLT0)
   755  	}
   756  	k := n
   757  	if s == blas.Left {
   758  		k = m
   759  	}
   760  	if lda < max(1, k) {
   761  		panic(badLdA)
   762  	}
   763  	if ldb < max(1, n) {
   764  		panic(badLdB)
   765  	}
   766  
   767  	// Quick return if possible.
   768  	if m == 0 || n == 0 {
   769  		return
   770  	}
   771  
   772  	// For zero matrix size the following slice length checks are trivially satisfied.
   773  	if len(a) < lda*(k-1)+k {
   774  		panic(shortA)
   775  	}
   776  	if len(b) < ldb*(m-1)+n {
   777  		panic(shortB)
   778  	}
   779  
   780  	if alpha == 0 {
   781  		for i := 0; i < m; i++ {
   782  			btmp := b[i*ldb : i*ldb+n]
   783  			for j := range btmp {
   784  				btmp[j] = 0
   785  			}
   786  		}
   787  		return
   788  	}
   789  
   790  	nonUnit := d == blas.NonUnit
   791  	if s == blas.Left {
   792  		if tA == blas.NoTrans {
   793  			if ul == blas.Upper {
   794  				for i := 0; i < m; i++ {
   795  					tmp := alpha
   796  					if nonUnit {
   797  						tmp *= a[i*lda+i]
   798  					}
   799  					btmp := b[i*ldb : i*ldb+n]
   800  					f32.ScalUnitary(tmp, btmp)
   801  					for ka, va := range a[i*lda+i+1 : i*lda+m] {
   802  						k := ka + i + 1
   803  						if va != 0 {
   804  							f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
   805  						}
   806  					}
   807  				}
   808  				return
   809  			}
   810  			for i := m - 1; i >= 0; i-- {
   811  				tmp := alpha
   812  				if nonUnit {
   813  					tmp *= a[i*lda+i]
   814  				}
   815  				btmp := b[i*ldb : i*ldb+n]
   816  				f32.ScalUnitary(tmp, btmp)
   817  				for k, va := range a[i*lda : i*lda+i] {
   818  					if va != 0 {
   819  						f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
   820  					}
   821  				}
   822  			}
   823  			return
   824  		}
   825  		// Cases where a is transposed.
   826  		if ul == blas.Upper {
   827  			for k := m - 1; k >= 0; k-- {
   828  				btmpk := b[k*ldb : k*ldb+n]
   829  				for ia, va := range a[k*lda+k+1 : k*lda+m] {
   830  					i := ia + k + 1
   831  					btmp := b[i*ldb : i*ldb+n]
   832  					if va != 0 {
   833  						f32.AxpyUnitary(alpha*va, btmpk, btmp)
   834  					}
   835  				}
   836  				tmp := alpha
   837  				if nonUnit {
   838  					tmp *= a[k*lda+k]
   839  				}
   840  				if tmp != 1 {
   841  					f32.ScalUnitary(tmp, btmpk)
   842  				}
   843  			}
   844  			return
   845  		}
   846  		for k := 0; k < m; k++ {
   847  			btmpk := b[k*ldb : k*ldb+n]
   848  			for i, va := range a[k*lda : k*lda+k] {
   849  				btmp := b[i*ldb : i*ldb+n]
   850  				if va != 0 {
   851  					f32.AxpyUnitary(alpha*va, btmpk, btmp)
   852  				}
   853  			}
   854  			tmp := alpha
   855  			if nonUnit {
   856  				tmp *= a[k*lda+k]
   857  			}
   858  			if tmp != 1 {
   859  				f32.ScalUnitary(tmp, btmpk)
   860  			}
   861  		}
   862  		return
   863  	}
   864  	// Cases where a is on the right
   865  	if tA == blas.NoTrans {
   866  		if ul == blas.Upper {
   867  			for i := 0; i < m; i++ {
   868  				btmp := b[i*ldb : i*ldb+n]
   869  				for k := n - 1; k >= 0; k-- {
   870  					tmp := alpha * btmp[k]
   871  					if tmp == 0 {
   872  						continue
   873  					}
   874  					btmp[k] = tmp
   875  					if nonUnit {
   876  						btmp[k] *= a[k*lda+k]
   877  					}
   878  					f32.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n])
   879  				}
   880  			}
   881  			return
   882  		}
   883  		for i := 0; i < m; i++ {
   884  			btmp := b[i*ldb : i*ldb+n]
   885  			for k := 0; k < n; k++ {
   886  				tmp := alpha * btmp[k]
   887  				if tmp == 0 {
   888  					continue
   889  				}
   890  				btmp[k] = tmp
   891  				if nonUnit {
   892  					btmp[k] *= a[k*lda+k]
   893  				}
   894  				f32.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k])
   895  			}
   896  		}
   897  		return
   898  	}
   899  	// Cases where a is transposed.
   900  	if ul == blas.Upper {
   901  		for i := 0; i < m; i++ {
   902  			btmp := b[i*ldb : i*ldb+n]
   903  			for j, vb := range btmp {
   904  				tmp := vb
   905  				if nonUnit {
   906  					tmp *= a[j*lda+j]
   907  				}
   908  				tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
   909  				btmp[j] = alpha * tmp
   910  			}
   911  		}
   912  		return
   913  	}
   914  	for i := 0; i < m; i++ {
   915  		btmp := b[i*ldb : i*ldb+n]
   916  		for j := n - 1; j >= 0; j-- {
   917  			tmp := btmp[j]
   918  			if nonUnit {
   919  				tmp *= a[j*lda+j]
   920  			}
   921  			tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
   922  			btmp[j] = alpha * tmp
   923  		}
   924  	}
   925  }