github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/blas/gonum/level3cmplx64.go (about)

     1  // Code generated by "go generate github.com/jingcheng-WU/gonum/blas/gonum”; DO NOT EDIT.
     2  
     3  // Copyright ©2019 The Gonum Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package gonum
     8  
     9  import (
    10  	cmplx "github.com/jingcheng-WU/gonum/internal/cmplx64"
    11  
    12  	"github.com/jingcheng-WU/gonum/blas"
    13  	"github.com/jingcheng-WU/gonum/internal/asm/c64"
    14  )
    15  
    16  var _ blas.Complex64Level3 = Implementation{}
    17  
    18  // Cgemm performs one of the matrix-matrix operations
    19  //  C = alpha * op(A) * op(B) + beta * C
    20  // where op(X) is one of
    21  //  op(X) = X  or  op(X) = Xᵀ  or  op(X) = Xᴴ,
    22  // alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix,
    23  // op(B) a k×n matrix and C an m×n matrix.
    24  //
    25  // Complex64 implementations are autogenerated and not directly tested.
    26  func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
    27  	switch tA {
    28  	default:
    29  		panic(badTranspose)
    30  	case blas.NoTrans, blas.Trans, blas.ConjTrans:
    31  	}
    32  	switch tB {
    33  	default:
    34  		panic(badTranspose)
    35  	case blas.NoTrans, blas.Trans, blas.ConjTrans:
    36  	}
    37  	switch {
    38  	case m < 0:
    39  		panic(mLT0)
    40  	case n < 0:
    41  		panic(nLT0)
    42  	case k < 0:
    43  		panic(kLT0)
    44  	}
    45  	rowA, colA := m, k
    46  	if tA != blas.NoTrans {
    47  		rowA, colA = k, m
    48  	}
    49  	if lda < max(1, colA) {
    50  		panic(badLdA)
    51  	}
    52  	rowB, colB := k, n
    53  	if tB != blas.NoTrans {
    54  		rowB, colB = n, k
    55  	}
    56  	if ldb < max(1, colB) {
    57  		panic(badLdB)
    58  	}
    59  	if ldc < max(1, n) {
    60  		panic(badLdC)
    61  	}
    62  
    63  	// Quick return if possible.
    64  	if m == 0 || n == 0 {
    65  		return
    66  	}
    67  
    68  	// For zero matrix size the following slice length checks are trivially satisfied.
    69  	if len(a) < (rowA-1)*lda+colA {
    70  		panic(shortA)
    71  	}
    72  	if len(b) < (rowB-1)*ldb+colB {
    73  		panic(shortB)
    74  	}
    75  	if len(c) < (m-1)*ldc+n {
    76  		panic(shortC)
    77  	}
    78  
    79  	// Quick return if possible.
    80  	if (alpha == 0 || k == 0) && beta == 1 {
    81  		return
    82  	}
    83  
    84  	if alpha == 0 {
    85  		if beta == 0 {
    86  			for i := 0; i < m; i++ {
    87  				for j := 0; j < n; j++ {
    88  					c[i*ldc+j] = 0
    89  				}
    90  			}
    91  		} else {
    92  			for i := 0; i < m; i++ {
    93  				for j := 0; j < n; j++ {
    94  					c[i*ldc+j] *= beta
    95  				}
    96  			}
    97  		}
    98  		return
    99  	}
   100  
   101  	switch tA {
   102  	case blas.NoTrans:
   103  		switch tB {
   104  		case blas.NoTrans:
   105  			// Form  C = alpha * A * B + beta * C.
   106  			for i := 0; i < m; i++ {
   107  				switch {
   108  				case beta == 0:
   109  					for j := 0; j < n; j++ {
   110  						c[i*ldc+j] = 0
   111  					}
   112  				case beta != 1:
   113  					for j := 0; j < n; j++ {
   114  						c[i*ldc+j] *= beta
   115  					}
   116  				}
   117  				for l := 0; l < k; l++ {
   118  					tmp := alpha * a[i*lda+l]
   119  					for j := 0; j < n; j++ {
   120  						c[i*ldc+j] += tmp * b[l*ldb+j]
   121  					}
   122  				}
   123  			}
   124  		case blas.Trans:
   125  			// Form  C = alpha * A * Bᵀ + beta * C.
   126  			for i := 0; i < m; i++ {
   127  				switch {
   128  				case beta == 0:
   129  					for j := 0; j < n; j++ {
   130  						c[i*ldc+j] = 0
   131  					}
   132  				case beta != 1:
   133  					for j := 0; j < n; j++ {
   134  						c[i*ldc+j] *= beta
   135  					}
   136  				}
   137  				for l := 0; l < k; l++ {
   138  					tmp := alpha * a[i*lda+l]
   139  					for j := 0; j < n; j++ {
   140  						c[i*ldc+j] += tmp * b[j*ldb+l]
   141  					}
   142  				}
   143  			}
   144  		case blas.ConjTrans:
   145  			// Form  C = alpha * A * Bᴴ + beta * C.
   146  			for i := 0; i < m; i++ {
   147  				switch {
   148  				case beta == 0:
   149  					for j := 0; j < n; j++ {
   150  						c[i*ldc+j] = 0
   151  					}
   152  				case beta != 1:
   153  					for j := 0; j < n; j++ {
   154  						c[i*ldc+j] *= beta
   155  					}
   156  				}
   157  				for l := 0; l < k; l++ {
   158  					tmp := alpha * a[i*lda+l]
   159  					for j := 0; j < n; j++ {
   160  						c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l])
   161  					}
   162  				}
   163  			}
   164  		}
   165  	case blas.Trans:
   166  		switch tB {
   167  		case blas.NoTrans:
   168  			// Form  C = alpha * Aᵀ * B + beta * C.
   169  			for i := 0; i < m; i++ {
   170  				for j := 0; j < n; j++ {
   171  					var tmp complex64
   172  					for l := 0; l < k; l++ {
   173  						tmp += a[l*lda+i] * b[l*ldb+j]
   174  					}
   175  					if beta == 0 {
   176  						c[i*ldc+j] = alpha * tmp
   177  					} else {
   178  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   179  					}
   180  				}
   181  			}
   182  		case blas.Trans:
   183  			// Form  C = alpha * Aᵀ * Bᵀ + beta * C.
   184  			for i := 0; i < m; i++ {
   185  				for j := 0; j < n; j++ {
   186  					var tmp complex64
   187  					for l := 0; l < k; l++ {
   188  						tmp += a[l*lda+i] * b[j*ldb+l]
   189  					}
   190  					if beta == 0 {
   191  						c[i*ldc+j] = alpha * tmp
   192  					} else {
   193  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   194  					}
   195  				}
   196  			}
   197  		case blas.ConjTrans:
   198  			// Form  C = alpha * Aᵀ * Bᴴ + beta * C.
   199  			for i := 0; i < m; i++ {
   200  				for j := 0; j < n; j++ {
   201  					var tmp complex64
   202  					for l := 0; l < k; l++ {
   203  						tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l])
   204  					}
   205  					if beta == 0 {
   206  						c[i*ldc+j] = alpha * tmp
   207  					} else {
   208  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   209  					}
   210  				}
   211  			}
   212  		}
   213  	case blas.ConjTrans:
   214  		switch tB {
   215  		case blas.NoTrans:
   216  			// Form  C = alpha * Aᴴ * B + beta * C.
   217  			for i := 0; i < m; i++ {
   218  				for j := 0; j < n; j++ {
   219  					var tmp complex64
   220  					for l := 0; l < k; l++ {
   221  						tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j]
   222  					}
   223  					if beta == 0 {
   224  						c[i*ldc+j] = alpha * tmp
   225  					} else {
   226  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   227  					}
   228  				}
   229  			}
   230  		case blas.Trans:
   231  			// Form  C = alpha * Aᴴ * Bᵀ + beta * C.
   232  			for i := 0; i < m; i++ {
   233  				for j := 0; j < n; j++ {
   234  					var tmp complex64
   235  					for l := 0; l < k; l++ {
   236  						tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l]
   237  					}
   238  					if beta == 0 {
   239  						c[i*ldc+j] = alpha * tmp
   240  					} else {
   241  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   242  					}
   243  				}
   244  			}
   245  		case blas.ConjTrans:
   246  			// Form  C = alpha * Aᴴ * Bᴴ + beta * C.
   247  			for i := 0; i < m; i++ {
   248  				for j := 0; j < n; j++ {
   249  					var tmp complex64
   250  					for l := 0; l < k; l++ {
   251  						tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l])
   252  					}
   253  					if beta == 0 {
   254  						c[i*ldc+j] = alpha * tmp
   255  					} else {
   256  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   257  					}
   258  				}
   259  			}
   260  		}
   261  	}
   262  }
   263  
   264  // Chemm performs one of the matrix-matrix operations
   265  //  C = alpha*A*B + beta*C  if side == blas.Left
   266  //  C = alpha*B*A + beta*C  if side == blas.Right
   267  // where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B
   268  // and C are m×n matrices. The imaginary parts of the diagonal elements of A are
   269  // assumed to be zero.
   270  //
   271  // Complex64 implementations are autogenerated and not directly tested.
   272  func (Implementation) Chemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
   273  	na := m
   274  	if side == blas.Right {
   275  		na = n
   276  	}
   277  	switch {
   278  	case side != blas.Left && side != blas.Right:
   279  		panic(badSide)
   280  	case uplo != blas.Lower && uplo != blas.Upper:
   281  		panic(badUplo)
   282  	case m < 0:
   283  		panic(mLT0)
   284  	case n < 0:
   285  		panic(nLT0)
   286  	case lda < max(1, na):
   287  		panic(badLdA)
   288  	case ldb < max(1, n):
   289  		panic(badLdB)
   290  	case ldc < max(1, n):
   291  		panic(badLdC)
   292  	}
   293  
   294  	// Quick return if possible.
   295  	if m == 0 || n == 0 {
   296  		return
   297  	}
   298  
   299  	// For zero matrix size the following slice length checks are trivially satisfied.
   300  	if len(a) < lda*(na-1)+na {
   301  		panic(shortA)
   302  	}
   303  	if len(b) < ldb*(m-1)+n {
   304  		panic(shortB)
   305  	}
   306  	if len(c) < ldc*(m-1)+n {
   307  		panic(shortC)
   308  	}
   309  
   310  	// Quick return if possible.
   311  	if alpha == 0 && beta == 1 {
   312  		return
   313  	}
   314  
   315  	if alpha == 0 {
   316  		if beta == 0 {
   317  			for i := 0; i < m; i++ {
   318  				ci := c[i*ldc : i*ldc+n]
   319  				for j := range ci {
   320  					ci[j] = 0
   321  				}
   322  			}
   323  		} else {
   324  			for i := 0; i < m; i++ {
   325  				ci := c[i*ldc : i*ldc+n]
   326  				c64.ScalUnitary(beta, ci)
   327  			}
   328  		}
   329  		return
   330  	}
   331  
   332  	if side == blas.Left {
   333  		// Form  C = alpha*A*B + beta*C.
   334  		for i := 0; i < m; i++ {
   335  			atmp := alpha * complex(real(a[i*lda+i]), 0)
   336  			bi := b[i*ldb : i*ldb+n]
   337  			ci := c[i*ldc : i*ldc+n]
   338  			if beta == 0 {
   339  				for j, bij := range bi {
   340  					ci[j] = atmp * bij
   341  				}
   342  			} else {
   343  				for j, bij := range bi {
   344  					ci[j] = atmp*bij + beta*ci[j]
   345  				}
   346  			}
   347  			if uplo == blas.Upper {
   348  				for k := 0; k < i; k++ {
   349  					atmp = alpha * cmplx.Conj(a[k*lda+i])
   350  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   351  				}
   352  				for k := i + 1; k < m; k++ {
   353  					atmp = alpha * a[i*lda+k]
   354  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   355  				}
   356  			} else {
   357  				for k := 0; k < i; k++ {
   358  					atmp = alpha * a[i*lda+k]
   359  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   360  				}
   361  				for k := i + 1; k < m; k++ {
   362  					atmp = alpha * cmplx.Conj(a[k*lda+i])
   363  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   364  				}
   365  			}
   366  		}
   367  	} else {
   368  		// Form  C = alpha*B*A + beta*C.
   369  		if uplo == blas.Upper {
   370  			for i := 0; i < m; i++ {
   371  				for j := n - 1; j >= 0; j-- {
   372  					abij := alpha * b[i*ldb+j]
   373  					aj := a[j*lda+j+1 : j*lda+n]
   374  					bi := b[i*ldb+j+1 : i*ldb+n]
   375  					ci := c[i*ldc+j+1 : i*ldc+n]
   376  					var tmp complex64
   377  					for k, ajk := range aj {
   378  						ci[k] += abij * ajk
   379  						tmp += bi[k] * cmplx.Conj(ajk)
   380  					}
   381  					ajj := complex(real(a[j*lda+j]), 0)
   382  					if beta == 0 {
   383  						c[i*ldc+j] = abij*ajj + alpha*tmp
   384  					} else {
   385  						c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j]
   386  					}
   387  				}
   388  			}
   389  		} else {
   390  			for i := 0; i < m; i++ {
   391  				for j := 0; j < n; j++ {
   392  					abij := alpha * b[i*ldb+j]
   393  					aj := a[j*lda : j*lda+j]
   394  					bi := b[i*ldb : i*ldb+j]
   395  					ci := c[i*ldc : i*ldc+j]
   396  					var tmp complex64
   397  					for k, ajk := range aj {
   398  						ci[k] += abij * ajk
   399  						tmp += bi[k] * cmplx.Conj(ajk)
   400  					}
   401  					ajj := complex(real(a[j*lda+j]), 0)
   402  					if beta == 0 {
   403  						c[i*ldc+j] = abij*ajj + alpha*tmp
   404  					} else {
   405  						c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j]
   406  					}
   407  				}
   408  			}
   409  		}
   410  	}
   411  }
   412  
   413  // Cherk performs one of the hermitian rank-k operations
   414  //  C = alpha*A*Aᴴ + beta*C  if trans == blas.NoTrans
   415  //  C = alpha*Aᴴ*A + beta*C  if trans == blas.ConjTrans
   416  // where alpha and beta are real scalars, C is an n×n hermitian matrix and A is
   417  // an n×k matrix in the first case and a k×n matrix in the second case.
   418  //
   419  // The imaginary parts of the diagonal elements of C are assumed to be zero, and
   420  // on return they will be set to zero.
   421  //
   422  // Complex64 implementations are autogenerated and not directly tested.
   423  func (Implementation) Cherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) {
   424  	var rowA, colA int
   425  	switch trans {
   426  	default:
   427  		panic(badTranspose)
   428  	case blas.NoTrans:
   429  		rowA, colA = n, k
   430  	case blas.ConjTrans:
   431  		rowA, colA = k, n
   432  	}
   433  	switch {
   434  	case uplo != blas.Lower && uplo != blas.Upper:
   435  		panic(badUplo)
   436  	case n < 0:
   437  		panic(nLT0)
   438  	case k < 0:
   439  		panic(kLT0)
   440  	case lda < max(1, colA):
   441  		panic(badLdA)
   442  	case ldc < max(1, n):
   443  		panic(badLdC)
   444  	}
   445  
   446  	// Quick return if possible.
   447  	if n == 0 {
   448  		return
   449  	}
   450  
   451  	// For zero matrix size the following slice length checks are trivially satisfied.
   452  	if len(a) < (rowA-1)*lda+colA {
   453  		panic(shortA)
   454  	}
   455  	if len(c) < (n-1)*ldc+n {
   456  		panic(shortC)
   457  	}
   458  
   459  	// Quick return if possible.
   460  	if (alpha == 0 || k == 0) && beta == 1 {
   461  		return
   462  	}
   463  
   464  	if alpha == 0 {
   465  		if uplo == blas.Upper {
   466  			if beta == 0 {
   467  				for i := 0; i < n; i++ {
   468  					ci := c[i*ldc+i : i*ldc+n]
   469  					for j := range ci {
   470  						ci[j] = 0
   471  					}
   472  				}
   473  			} else {
   474  				for i := 0; i < n; i++ {
   475  					ci := c[i*ldc+i : i*ldc+n]
   476  					ci[0] = complex(beta*real(ci[0]), 0)
   477  					if i != n-1 {
   478  						c64.SscalUnitary(beta, ci[1:])
   479  					}
   480  				}
   481  			}
   482  		} else {
   483  			if beta == 0 {
   484  				for i := 0; i < n; i++ {
   485  					ci := c[i*ldc : i*ldc+i+1]
   486  					for j := range ci {
   487  						ci[j] = 0
   488  					}
   489  				}
   490  			} else {
   491  				for i := 0; i < n; i++ {
   492  					ci := c[i*ldc : i*ldc+i+1]
   493  					if i != 0 {
   494  						c64.SscalUnitary(beta, ci[:i])
   495  					}
   496  					ci[i] = complex(beta*real(ci[i]), 0)
   497  				}
   498  			}
   499  		}
   500  		return
   501  	}
   502  
   503  	calpha := complex(alpha, 0)
   504  	if trans == blas.NoTrans {
   505  		// Form  C = alpha*A*Aᴴ + beta*C.
   506  		cbeta := complex(beta, 0)
   507  		if uplo == blas.Upper {
   508  			for i := 0; i < n; i++ {
   509  				ci := c[i*ldc+i : i*ldc+n]
   510  				ai := a[i*lda : i*lda+k]
   511  				switch {
   512  				case beta == 0:
   513  					// Handle the i-th diagonal element of C.
   514  					ci[0] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0)
   515  					// Handle the remaining elements on the i-th row of C.
   516  					for jc := range ci[1:] {
   517  						j := i + 1 + jc
   518  						ci[jc+1] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai)
   519  					}
   520  				case beta != 1:
   521  					cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[0]
   522  					ci[0] = complex(real(cii), 0)
   523  					for jc, cij := range ci[1:] {
   524  						j := i + 1 + jc
   525  						ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij
   526  					}
   527  				default:
   528  					cii := calpha*c64.DotcUnitary(ai, ai) + ci[0]
   529  					ci[0] = complex(real(cii), 0)
   530  					for jc, cij := range ci[1:] {
   531  						j := i + 1 + jc
   532  						ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij
   533  					}
   534  				}
   535  			}
   536  		} else {
   537  			for i := 0; i < n; i++ {
   538  				ci := c[i*ldc : i*ldc+i+1]
   539  				ai := a[i*lda : i*lda+k]
   540  				switch {
   541  				case beta == 0:
   542  					// Handle the first i-1 elements on the i-th row of C.
   543  					for j := range ci[:i] {
   544  						ci[j] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai)
   545  					}
   546  					// Handle the i-th diagonal element of C.
   547  					ci[i] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0)
   548  				case beta != 1:
   549  					for j, cij := range ci[:i] {
   550  						ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij
   551  					}
   552  					cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[i]
   553  					ci[i] = complex(real(cii), 0)
   554  				default:
   555  					for j, cij := range ci[:i] {
   556  						ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij
   557  					}
   558  					cii := calpha*c64.DotcUnitary(ai, ai) + ci[i]
   559  					ci[i] = complex(real(cii), 0)
   560  				}
   561  			}
   562  		}
   563  	} else {
   564  		// Form  C = alpha*Aᴴ*A + beta*C.
   565  		if uplo == blas.Upper {
   566  			for i := 0; i < n; i++ {
   567  				ci := c[i*ldc+i : i*ldc+n]
   568  				switch {
   569  				case beta == 0:
   570  					for jc := range ci {
   571  						ci[jc] = 0
   572  					}
   573  				case beta != 1:
   574  					c64.SscalUnitary(beta, ci)
   575  					ci[0] = complex(real(ci[0]), 0)
   576  				default:
   577  					ci[0] = complex(real(ci[0]), 0)
   578  				}
   579  				for j := 0; j < k; j++ {
   580  					aji := cmplx.Conj(a[j*lda+i])
   581  					if aji != 0 {
   582  						c64.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci)
   583  					}
   584  				}
   585  				c[i*ldc+i] = complex(real(c[i*ldc+i]), 0)
   586  			}
   587  		} else {
   588  			for i := 0; i < n; i++ {
   589  				ci := c[i*ldc : i*ldc+i+1]
   590  				switch {
   591  				case beta == 0:
   592  					for j := range ci {
   593  						ci[j] = 0
   594  					}
   595  				case beta != 1:
   596  					c64.SscalUnitary(beta, ci)
   597  					ci[i] = complex(real(ci[i]), 0)
   598  				default:
   599  					ci[i] = complex(real(ci[i]), 0)
   600  				}
   601  				for j := 0; j < k; j++ {
   602  					aji := cmplx.Conj(a[j*lda+i])
   603  					if aji != 0 {
   604  						c64.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci)
   605  					}
   606  				}
   607  				c[i*ldc+i] = complex(real(c[i*ldc+i]), 0)
   608  			}
   609  		}
   610  	}
   611  }
   612  
   613  // Cher2k performs one of the hermitian rank-2k operations
   614  //  C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C  if trans == blas.NoTrans
   615  //  C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C  if trans == blas.ConjTrans
   616  // where alpha and beta are scalars with beta real, C is an n×n hermitian matrix
   617  // and A and B are n×k matrices in the first case and k×n matrices in the second case.
   618  //
   619  // The imaginary parts of the diagonal elements of C are assumed to be zero, and
   620  // on return they will be set to zero.
   621  //
   622  // Complex64 implementations are autogenerated and not directly tested.
   623  func (Implementation) Cher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) {
   624  	var row, col int
   625  	switch trans {
   626  	default:
   627  		panic(badTranspose)
   628  	case blas.NoTrans:
   629  		row, col = n, k
   630  	case blas.ConjTrans:
   631  		row, col = k, n
   632  	}
   633  	switch {
   634  	case uplo != blas.Lower && uplo != blas.Upper:
   635  		panic(badUplo)
   636  	case n < 0:
   637  		panic(nLT0)
   638  	case k < 0:
   639  		panic(kLT0)
   640  	case lda < max(1, col):
   641  		panic(badLdA)
   642  	case ldb < max(1, col):
   643  		panic(badLdB)
   644  	case ldc < max(1, n):
   645  		panic(badLdC)
   646  	}
   647  
   648  	// Quick return if possible.
   649  	if n == 0 {
   650  		return
   651  	}
   652  
   653  	// For zero matrix size the following slice length checks are trivially satisfied.
   654  	if len(a) < (row-1)*lda+col {
   655  		panic(shortA)
   656  	}
   657  	if len(b) < (row-1)*ldb+col {
   658  		panic(shortB)
   659  	}
   660  	if len(c) < (n-1)*ldc+n {
   661  		panic(shortC)
   662  	}
   663  
   664  	// Quick return if possible.
   665  	if (alpha == 0 || k == 0) && beta == 1 {
   666  		return
   667  	}
   668  
   669  	if alpha == 0 {
   670  		if uplo == blas.Upper {
   671  			if beta == 0 {
   672  				for i := 0; i < n; i++ {
   673  					ci := c[i*ldc+i : i*ldc+n]
   674  					for j := range ci {
   675  						ci[j] = 0
   676  					}
   677  				}
   678  			} else {
   679  				for i := 0; i < n; i++ {
   680  					ci := c[i*ldc+i : i*ldc+n]
   681  					ci[0] = complex(beta*real(ci[0]), 0)
   682  					if i != n-1 {
   683  						c64.SscalUnitary(beta, ci[1:])
   684  					}
   685  				}
   686  			}
   687  		} else {
   688  			if beta == 0 {
   689  				for i := 0; i < n; i++ {
   690  					ci := c[i*ldc : i*ldc+i+1]
   691  					for j := range ci {
   692  						ci[j] = 0
   693  					}
   694  				}
   695  			} else {
   696  				for i := 0; i < n; i++ {
   697  					ci := c[i*ldc : i*ldc+i+1]
   698  					if i != 0 {
   699  						c64.SscalUnitary(beta, ci[:i])
   700  					}
   701  					ci[i] = complex(beta*real(ci[i]), 0)
   702  				}
   703  			}
   704  		}
   705  		return
   706  	}
   707  
   708  	conjalpha := cmplx.Conj(alpha)
   709  	cbeta := complex(beta, 0)
   710  	if trans == blas.NoTrans {
   711  		// Form  C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C.
   712  		if uplo == blas.Upper {
   713  			for i := 0; i < n; i++ {
   714  				ci := c[i*ldc+i+1 : i*ldc+n]
   715  				ai := a[i*lda : i*lda+k]
   716  				bi := b[i*ldb : i*ldb+k]
   717  				if beta == 0 {
   718  					cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi)
   719  					c[i*ldc+i] = complex(real(cii), 0)
   720  					for jc := range ci {
   721  						j := i + 1 + jc
   722  						ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi)
   723  					}
   724  				} else {
   725  					cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i]
   726  					c[i*ldc+i] = complex(real(cii), 0)
   727  					for jc, cij := range ci {
   728  						j := i + 1 + jc
   729  						ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij
   730  					}
   731  				}
   732  			}
   733  		} else {
   734  			for i := 0; i < n; i++ {
   735  				ci := c[i*ldc : i*ldc+i]
   736  				ai := a[i*lda : i*lda+k]
   737  				bi := b[i*ldb : i*ldb+k]
   738  				if beta == 0 {
   739  					for j := range ci {
   740  						ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi)
   741  					}
   742  					cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi)
   743  					c[i*ldc+i] = complex(real(cii), 0)
   744  				} else {
   745  					for j, cij := range ci {
   746  						ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij
   747  					}
   748  					cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i]
   749  					c[i*ldc+i] = complex(real(cii), 0)
   750  				}
   751  			}
   752  		}
   753  	} else {
   754  		// Form  C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C.
   755  		if uplo == blas.Upper {
   756  			for i := 0; i < n; i++ {
   757  				ci := c[i*ldc+i : i*ldc+n]
   758  				switch {
   759  				case beta == 0:
   760  					for jc := range ci {
   761  						ci[jc] = 0
   762  					}
   763  				case beta != 1:
   764  					c64.SscalUnitary(beta, ci)
   765  					ci[0] = complex(real(ci[0]), 0)
   766  				default:
   767  					ci[0] = complex(real(ci[0]), 0)
   768  				}
   769  				for j := 0; j < k; j++ {
   770  					aji := a[j*lda+i]
   771  					bji := b[j*ldb+i]
   772  					if aji != 0 {
   773  						c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci)
   774  					}
   775  					if bji != 0 {
   776  						c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci)
   777  					}
   778  				}
   779  				ci[0] = complex(real(ci[0]), 0)
   780  			}
   781  		} else {
   782  			for i := 0; i < n; i++ {
   783  				ci := c[i*ldc : i*ldc+i+1]
   784  				switch {
   785  				case beta == 0:
   786  					for j := range ci {
   787  						ci[j] = 0
   788  					}
   789  				case beta != 1:
   790  					c64.SscalUnitary(beta, ci)
   791  					ci[i] = complex(real(ci[i]), 0)
   792  				default:
   793  					ci[i] = complex(real(ci[i]), 0)
   794  				}
   795  				for j := 0; j < k; j++ {
   796  					aji := a[j*lda+i]
   797  					bji := b[j*ldb+i]
   798  					if aji != 0 {
   799  						c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci)
   800  					}
   801  					if bji != 0 {
   802  						c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci)
   803  					}
   804  				}
   805  				ci[i] = complex(real(ci[i]), 0)
   806  			}
   807  		}
   808  	}
   809  }
   810  
   811  // Csymm performs one of the matrix-matrix operations
   812  //  C = alpha*A*B + beta*C  if side == blas.Left
   813  //  C = alpha*B*A + beta*C  if side == blas.Right
   814  // where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B
   815  // and C are m×n matrices.
   816  //
   817  // Complex64 implementations are autogenerated and not directly tested.
   818  func (Implementation) Csymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
   819  	na := m
   820  	if side == blas.Right {
   821  		na = n
   822  	}
   823  	switch {
   824  	case side != blas.Left && side != blas.Right:
   825  		panic(badSide)
   826  	case uplo != blas.Lower && uplo != blas.Upper:
   827  		panic(badUplo)
   828  	case m < 0:
   829  		panic(mLT0)
   830  	case n < 0:
   831  		panic(nLT0)
   832  	case lda < max(1, na):
   833  		panic(badLdA)
   834  	case ldb < max(1, n):
   835  		panic(badLdB)
   836  	case ldc < max(1, n):
   837  		panic(badLdC)
   838  	}
   839  
   840  	// Quick return if possible.
   841  	if m == 0 || n == 0 {
   842  		return
   843  	}
   844  
   845  	// For zero matrix size the following slice length checks are trivially satisfied.
   846  	if len(a) < lda*(na-1)+na {
   847  		panic(shortA)
   848  	}
   849  	if len(b) < ldb*(m-1)+n {
   850  		panic(shortB)
   851  	}
   852  	if len(c) < ldc*(m-1)+n {
   853  		panic(shortC)
   854  	}
   855  
   856  	// Quick return if possible.
   857  	if alpha == 0 && beta == 1 {
   858  		return
   859  	}
   860  
   861  	if alpha == 0 {
   862  		if beta == 0 {
   863  			for i := 0; i < m; i++ {
   864  				ci := c[i*ldc : i*ldc+n]
   865  				for j := range ci {
   866  					ci[j] = 0
   867  				}
   868  			}
   869  		} else {
   870  			for i := 0; i < m; i++ {
   871  				ci := c[i*ldc : i*ldc+n]
   872  				c64.ScalUnitary(beta, ci)
   873  			}
   874  		}
   875  		return
   876  	}
   877  
   878  	if side == blas.Left {
   879  		// Form  C = alpha*A*B + beta*C.
   880  		for i := 0; i < m; i++ {
   881  			atmp := alpha * a[i*lda+i]
   882  			bi := b[i*ldb : i*ldb+n]
   883  			ci := c[i*ldc : i*ldc+n]
   884  			if beta == 0 {
   885  				for j, bij := range bi {
   886  					ci[j] = atmp * bij
   887  				}
   888  			} else {
   889  				for j, bij := range bi {
   890  					ci[j] = atmp*bij + beta*ci[j]
   891  				}
   892  			}
   893  			if uplo == blas.Upper {
   894  				for k := 0; k < i; k++ {
   895  					atmp = alpha * a[k*lda+i]
   896  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   897  				}
   898  				for k := i + 1; k < m; k++ {
   899  					atmp = alpha * a[i*lda+k]
   900  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   901  				}
   902  			} else {
   903  				for k := 0; k < i; k++ {
   904  					atmp = alpha * a[i*lda+k]
   905  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   906  				}
   907  				for k := i + 1; k < m; k++ {
   908  					atmp = alpha * a[k*lda+i]
   909  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   910  				}
   911  			}
   912  		}
   913  	} else {
   914  		// Form  C = alpha*B*A + beta*C.
   915  		if uplo == blas.Upper {
   916  			for i := 0; i < m; i++ {
   917  				for j := n - 1; j >= 0; j-- {
   918  					abij := alpha * b[i*ldb+j]
   919  					aj := a[j*lda+j+1 : j*lda+n]
   920  					bi := b[i*ldb+j+1 : i*ldb+n]
   921  					ci := c[i*ldc+j+1 : i*ldc+n]
   922  					var tmp complex64
   923  					for k, ajk := range aj {
   924  						ci[k] += abij * ajk
   925  						tmp += bi[k] * ajk
   926  					}
   927  					if beta == 0 {
   928  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp
   929  					} else {
   930  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j]
   931  					}
   932  				}
   933  			}
   934  		} else {
   935  			for i := 0; i < m; i++ {
   936  				for j := 0; j < n; j++ {
   937  					abij := alpha * b[i*ldb+j]
   938  					aj := a[j*lda : j*lda+j]
   939  					bi := b[i*ldb : i*ldb+j]
   940  					ci := c[i*ldc : i*ldc+j]
   941  					var tmp complex64
   942  					for k, ajk := range aj {
   943  						ci[k] += abij * ajk
   944  						tmp += bi[k] * ajk
   945  					}
   946  					if beta == 0 {
   947  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp
   948  					} else {
   949  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j]
   950  					}
   951  				}
   952  			}
   953  		}
   954  	}
   955  }
   956  
   957  // Csyrk performs one of the symmetric rank-k operations
   958  //  C = alpha*A*Aᵀ + beta*C  if trans == blas.NoTrans
   959  //  C = alpha*Aᵀ*A + beta*C  if trans == blas.Trans
   960  // where alpha and beta are scalars, C is an n×n symmetric matrix and A is
   961  // an n×k matrix in the first case and a k×n matrix in the second case.
   962  //
   963  // Complex64 implementations are autogenerated and not directly tested.
   964  func (Implementation) Csyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) {
   965  	var rowA, colA int
   966  	switch trans {
   967  	default:
   968  		panic(badTranspose)
   969  	case blas.NoTrans:
   970  		rowA, colA = n, k
   971  	case blas.Trans:
   972  		rowA, colA = k, n
   973  	}
   974  	switch {
   975  	case uplo != blas.Lower && uplo != blas.Upper:
   976  		panic(badUplo)
   977  	case n < 0:
   978  		panic(nLT0)
   979  	case k < 0:
   980  		panic(kLT0)
   981  	case lda < max(1, colA):
   982  		panic(badLdA)
   983  	case ldc < max(1, n):
   984  		panic(badLdC)
   985  	}
   986  
   987  	// Quick return if possible.
   988  	if n == 0 {
   989  		return
   990  	}
   991  
   992  	// For zero matrix size the following slice length checks are trivially satisfied.
   993  	if len(a) < (rowA-1)*lda+colA {
   994  		panic(shortA)
   995  	}
   996  	if len(c) < (n-1)*ldc+n {
   997  		panic(shortC)
   998  	}
   999  
  1000  	// Quick return if possible.
  1001  	if (alpha == 0 || k == 0) && beta == 1 {
  1002  		return
  1003  	}
  1004  
  1005  	if alpha == 0 {
  1006  		if uplo == blas.Upper {
  1007  			if beta == 0 {
  1008  				for i := 0; i < n; i++ {
  1009  					ci := c[i*ldc+i : i*ldc+n]
  1010  					for j := range ci {
  1011  						ci[j] = 0
  1012  					}
  1013  				}
  1014  			} else {
  1015  				for i := 0; i < n; i++ {
  1016  					ci := c[i*ldc+i : i*ldc+n]
  1017  					c64.ScalUnitary(beta, ci)
  1018  				}
  1019  			}
  1020  		} else {
  1021  			if beta == 0 {
  1022  				for i := 0; i < n; i++ {
  1023  					ci := c[i*ldc : i*ldc+i+1]
  1024  					for j := range ci {
  1025  						ci[j] = 0
  1026  					}
  1027  				}
  1028  			} else {
  1029  				for i := 0; i < n; i++ {
  1030  					ci := c[i*ldc : i*ldc+i+1]
  1031  					c64.ScalUnitary(beta, ci)
  1032  				}
  1033  			}
  1034  		}
  1035  		return
  1036  	}
  1037  
  1038  	if trans == blas.NoTrans {
  1039  		// Form  C = alpha*A*Aᵀ + beta*C.
  1040  		if uplo == blas.Upper {
  1041  			for i := 0; i < n; i++ {
  1042  				ci := c[i*ldc+i : i*ldc+n]
  1043  				ai := a[i*lda : i*lda+k]
  1044  				if beta == 0 {
  1045  					for jc := range ci {
  1046  						j := i + jc
  1047  						ci[jc] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k])
  1048  					}
  1049  				} else {
  1050  					for jc, cij := range ci {
  1051  						j := i + jc
  1052  						ci[jc] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k])
  1053  					}
  1054  				}
  1055  			}
  1056  		} else {
  1057  			for i := 0; i < n; i++ {
  1058  				ci := c[i*ldc : i*ldc+i+1]
  1059  				ai := a[i*lda : i*lda+k]
  1060  				if beta == 0 {
  1061  					for j := range ci {
  1062  						ci[j] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k])
  1063  					}
  1064  				} else {
  1065  					for j, cij := range ci {
  1066  						ci[j] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k])
  1067  					}
  1068  				}
  1069  			}
  1070  		}
  1071  	} else {
  1072  		// Form  C = alpha*Aᵀ*A + beta*C.
  1073  		if uplo == blas.Upper {
  1074  			for i := 0; i < n; i++ {
  1075  				ci := c[i*ldc+i : i*ldc+n]
  1076  				switch {
  1077  				case beta == 0:
  1078  					for jc := range ci {
  1079  						ci[jc] = 0
  1080  					}
  1081  				case beta != 1:
  1082  					for jc := range ci {
  1083  						ci[jc] *= beta
  1084  					}
  1085  				}
  1086  				for j := 0; j < k; j++ {
  1087  					aji := a[j*lda+i]
  1088  					if aji != 0 {
  1089  						c64.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci)
  1090  					}
  1091  				}
  1092  			}
  1093  		} else {
  1094  			for i := 0; i < n; i++ {
  1095  				ci := c[i*ldc : i*ldc+i+1]
  1096  				switch {
  1097  				case beta == 0:
  1098  					for j := range ci {
  1099  						ci[j] = 0
  1100  					}
  1101  				case beta != 1:
  1102  					for j := range ci {
  1103  						ci[j] *= beta
  1104  					}
  1105  				}
  1106  				for j := 0; j < k; j++ {
  1107  					aji := a[j*lda+i]
  1108  					if aji != 0 {
  1109  						c64.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci)
  1110  					}
  1111  				}
  1112  			}
  1113  		}
  1114  	}
  1115  }
  1116  
  1117  // Csyr2k performs one of the symmetric rank-2k operations
  1118  //  C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C  if trans == blas.NoTrans
  1119  //  C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C  if trans == blas.Trans
  1120  // where alpha and beta are scalars, C is an n×n symmetric matrix and A and B
  1121  // are n×k matrices in the first case and k×n matrices in the second case.
  1122  //
  1123  // Complex64 implementations are autogenerated and not directly tested.
  1124  func (Implementation) Csyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
  1125  	var row, col int
  1126  	switch trans {
  1127  	default:
  1128  		panic(badTranspose)
  1129  	case blas.NoTrans:
  1130  		row, col = n, k
  1131  	case blas.Trans:
  1132  		row, col = k, n
  1133  	}
  1134  	switch {
  1135  	case uplo != blas.Lower && uplo != blas.Upper:
  1136  		panic(badUplo)
  1137  	case n < 0:
  1138  		panic(nLT0)
  1139  	case k < 0:
  1140  		panic(kLT0)
  1141  	case lda < max(1, col):
  1142  		panic(badLdA)
  1143  	case ldb < max(1, col):
  1144  		panic(badLdB)
  1145  	case ldc < max(1, n):
  1146  		panic(badLdC)
  1147  	}
  1148  
  1149  	// Quick return if possible.
  1150  	if n == 0 {
  1151  		return
  1152  	}
  1153  
  1154  	// For zero matrix size the following slice length checks are trivially satisfied.
  1155  	if len(a) < (row-1)*lda+col {
  1156  		panic(shortA)
  1157  	}
  1158  	if len(b) < (row-1)*ldb+col {
  1159  		panic(shortB)
  1160  	}
  1161  	if len(c) < (n-1)*ldc+n {
  1162  		panic(shortC)
  1163  	}
  1164  
  1165  	// Quick return if possible.
  1166  	if (alpha == 0 || k == 0) && beta == 1 {
  1167  		return
  1168  	}
  1169  
  1170  	if alpha == 0 {
  1171  		if uplo == blas.Upper {
  1172  			if beta == 0 {
  1173  				for i := 0; i < n; i++ {
  1174  					ci := c[i*ldc+i : i*ldc+n]
  1175  					for j := range ci {
  1176  						ci[j] = 0
  1177  					}
  1178  				}
  1179  			} else {
  1180  				for i := 0; i < n; i++ {
  1181  					ci := c[i*ldc+i : i*ldc+n]
  1182  					c64.ScalUnitary(beta, ci)
  1183  				}
  1184  			}
  1185  		} else {
  1186  			if beta == 0 {
  1187  				for i := 0; i < n; i++ {
  1188  					ci := c[i*ldc : i*ldc+i+1]
  1189  					for j := range ci {
  1190  						ci[j] = 0
  1191  					}
  1192  				}
  1193  			} else {
  1194  				for i := 0; i < n; i++ {
  1195  					ci := c[i*ldc : i*ldc+i+1]
  1196  					c64.ScalUnitary(beta, ci)
  1197  				}
  1198  			}
  1199  		}
  1200  		return
  1201  	}
  1202  
  1203  	if trans == blas.NoTrans {
  1204  		// Form  C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C.
  1205  		if uplo == blas.Upper {
  1206  			for i := 0; i < n; i++ {
  1207  				ci := c[i*ldc+i : i*ldc+n]
  1208  				ai := a[i*lda : i*lda+k]
  1209  				bi := b[i*ldb : i*ldb+k]
  1210  				if beta == 0 {
  1211  					for jc := range ci {
  1212  						j := i + jc
  1213  						ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k])
  1214  					}
  1215  				} else {
  1216  					for jc, cij := range ci {
  1217  						j := i + jc
  1218  						ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij
  1219  					}
  1220  				}
  1221  			}
  1222  		} else {
  1223  			for i := 0; i < n; i++ {
  1224  				ci := c[i*ldc : i*ldc+i+1]
  1225  				ai := a[i*lda : i*lda+k]
  1226  				bi := b[i*ldb : i*ldb+k]
  1227  				if beta == 0 {
  1228  					for j := range ci {
  1229  						ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k])
  1230  					}
  1231  				} else {
  1232  					for j, cij := range ci {
  1233  						ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij
  1234  					}
  1235  				}
  1236  			}
  1237  		}
  1238  	} else {
  1239  		// Form  C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C.
  1240  		if uplo == blas.Upper {
  1241  			for i := 0; i < n; i++ {
  1242  				ci := c[i*ldc+i : i*ldc+n]
  1243  				switch {
  1244  				case beta == 0:
  1245  					for jc := range ci {
  1246  						ci[jc] = 0
  1247  					}
  1248  				case beta != 1:
  1249  					for jc := range ci {
  1250  						ci[jc] *= beta
  1251  					}
  1252  				}
  1253  				for j := 0; j < k; j++ {
  1254  					aji := a[j*lda+i]
  1255  					bji := b[j*ldb+i]
  1256  					if aji != 0 {
  1257  						c64.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci)
  1258  					}
  1259  					if bji != 0 {
  1260  						c64.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci)
  1261  					}
  1262  				}
  1263  			}
  1264  		} else {
  1265  			for i := 0; i < n; i++ {
  1266  				ci := c[i*ldc : i*ldc+i+1]
  1267  				switch {
  1268  				case beta == 0:
  1269  					for j := range ci {
  1270  						ci[j] = 0
  1271  					}
  1272  				case beta != 1:
  1273  					for j := range ci {
  1274  						ci[j] *= beta
  1275  					}
  1276  				}
  1277  				for j := 0; j < k; j++ {
  1278  					aji := a[j*lda+i]
  1279  					bji := b[j*ldb+i]
  1280  					if aji != 0 {
  1281  						c64.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci)
  1282  					}
  1283  					if bji != 0 {
  1284  						c64.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci)
  1285  					}
  1286  				}
  1287  			}
  1288  		}
  1289  	}
  1290  }
  1291  
  1292  // Ctrmm performs one of the matrix-matrix operations
  1293  //  B = alpha * op(A) * B  if side == blas.Left,
  1294  //  B = alpha * B * op(A)  if side == blas.Right,
  1295  // where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit,
  1296  // upper or lower triangular matrix and op(A) is one of
  1297  //  op(A) = A   if trans == blas.NoTrans,
  1298  //  op(A) = Aᵀ  if trans == blas.Trans,
  1299  //  op(A) = Aᴴ  if trans == blas.ConjTrans.
  1300  //
  1301  // Complex64 implementations are autogenerated and not directly tested.
  1302  func (Implementation) Ctrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
  1303  	na := m
  1304  	if side == blas.Right {
  1305  		na = n
  1306  	}
  1307  	switch {
  1308  	case side != blas.Left && side != blas.Right:
  1309  		panic(badSide)
  1310  	case uplo != blas.Lower && uplo != blas.Upper:
  1311  		panic(badUplo)
  1312  	case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans:
  1313  		panic(badTranspose)
  1314  	case diag != blas.Unit && diag != blas.NonUnit:
  1315  		panic(badDiag)
  1316  	case m < 0:
  1317  		panic(mLT0)
  1318  	case n < 0:
  1319  		panic(nLT0)
  1320  	case lda < max(1, na):
  1321  		panic(badLdA)
  1322  	case ldb < max(1, n):
  1323  		panic(badLdB)
  1324  	}
  1325  
  1326  	// Quick return if possible.
  1327  	if m == 0 || n == 0 {
  1328  		return
  1329  	}
  1330  
  1331  	// For zero matrix size the following slice length checks are trivially satisfied.
  1332  	if len(a) < (na-1)*lda+na {
  1333  		panic(shortA)
  1334  	}
  1335  	if len(b) < (m-1)*ldb+n {
  1336  		panic(shortB)
  1337  	}
  1338  
  1339  	// Quick return if possible.
  1340  	if alpha == 0 {
  1341  		for i := 0; i < m; i++ {
  1342  			bi := b[i*ldb : i*ldb+n]
  1343  			for j := range bi {
  1344  				bi[j] = 0
  1345  			}
  1346  		}
  1347  		return
  1348  	}
  1349  
  1350  	noConj := trans != blas.ConjTrans
  1351  	noUnit := diag == blas.NonUnit
  1352  	if side == blas.Left {
  1353  		if trans == blas.NoTrans {
  1354  			// Form B = alpha*A*B.
  1355  			if uplo == blas.Upper {
  1356  				for i := 0; i < m; i++ {
  1357  					aii := alpha
  1358  					if noUnit {
  1359  						aii *= a[i*lda+i]
  1360  					}
  1361  					bi := b[i*ldb : i*ldb+n]
  1362  					for j := range bi {
  1363  						bi[j] *= aii
  1364  					}
  1365  					for ja, aij := range a[i*lda+i+1 : i*lda+m] {
  1366  						j := ja + i + 1
  1367  						if aij != 0 {
  1368  							c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi)
  1369  						}
  1370  					}
  1371  				}
  1372  			} else {
  1373  				for i := m - 1; i >= 0; i-- {
  1374  					aii := alpha
  1375  					if noUnit {
  1376  						aii *= a[i*lda+i]
  1377  					}
  1378  					bi := b[i*ldb : i*ldb+n]
  1379  					for j := range bi {
  1380  						bi[j] *= aii
  1381  					}
  1382  					for j, aij := range a[i*lda : i*lda+i] {
  1383  						if aij != 0 {
  1384  							c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi)
  1385  						}
  1386  					}
  1387  				}
  1388  			}
  1389  		} else {
  1390  			// Form B = alpha*Aᵀ*B  or  B = alpha*Aᴴ*B.
  1391  			if uplo == blas.Upper {
  1392  				for k := m - 1; k >= 0; k-- {
  1393  					bk := b[k*ldb : k*ldb+n]
  1394  					for ja, ajk := range a[k*lda+k+1 : k*lda+m] {
  1395  						if ajk == 0 {
  1396  							continue
  1397  						}
  1398  						j := k + 1 + ja
  1399  						if noConj {
  1400  							c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n])
  1401  						} else {
  1402  							c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n])
  1403  						}
  1404  					}
  1405  					akk := alpha
  1406  					if noUnit {
  1407  						if noConj {
  1408  							akk *= a[k*lda+k]
  1409  						} else {
  1410  							akk *= cmplx.Conj(a[k*lda+k])
  1411  						}
  1412  					}
  1413  					if akk != 1 {
  1414  						c64.ScalUnitary(akk, bk)
  1415  					}
  1416  				}
  1417  			} else {
  1418  				for k := 0; k < m; k++ {
  1419  					bk := b[k*ldb : k*ldb+n]
  1420  					for j, ajk := range a[k*lda : k*lda+k] {
  1421  						if ajk == 0 {
  1422  							continue
  1423  						}
  1424  						if noConj {
  1425  							c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n])
  1426  						} else {
  1427  							c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n])
  1428  						}
  1429  					}
  1430  					akk := alpha
  1431  					if noUnit {
  1432  						if noConj {
  1433  							akk *= a[k*lda+k]
  1434  						} else {
  1435  							akk *= cmplx.Conj(a[k*lda+k])
  1436  						}
  1437  					}
  1438  					if akk != 1 {
  1439  						c64.ScalUnitary(akk, bk)
  1440  					}
  1441  				}
  1442  			}
  1443  		}
  1444  	} else {
  1445  		if trans == blas.NoTrans {
  1446  			// Form B = alpha*B*A.
  1447  			if uplo == blas.Upper {
  1448  				for i := 0; i < m; i++ {
  1449  					bi := b[i*ldb : i*ldb+n]
  1450  					for k := n - 1; k >= 0; k-- {
  1451  						abik := alpha * bi[k]
  1452  						if abik == 0 {
  1453  							continue
  1454  						}
  1455  						bi[k] = abik
  1456  						if noUnit {
  1457  							bi[k] *= a[k*lda+k]
  1458  						}
  1459  						c64.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:])
  1460  					}
  1461  				}
  1462  			} else {
  1463  				for i := 0; i < m; i++ {
  1464  					bi := b[i*ldb : i*ldb+n]
  1465  					for k := 0; k < n; k++ {
  1466  						abik := alpha * bi[k]
  1467  						if abik == 0 {
  1468  							continue
  1469  						}
  1470  						bi[k] = abik
  1471  						if noUnit {
  1472  							bi[k] *= a[k*lda+k]
  1473  						}
  1474  						c64.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k])
  1475  					}
  1476  				}
  1477  			}
  1478  		} else {
  1479  			// Form B = alpha*B*Aᵀ  or  B = alpha*B*Aᴴ.
  1480  			if uplo == blas.Upper {
  1481  				for i := 0; i < m; i++ {
  1482  					bi := b[i*ldb : i*ldb+n]
  1483  					for j, bij := range bi {
  1484  						if noConj {
  1485  							if noUnit {
  1486  								bij *= a[j*lda+j]
  1487  							}
  1488  							bij += c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1489  						} else {
  1490  							if noUnit {
  1491  								bij *= cmplx.Conj(a[j*lda+j])
  1492  							}
  1493  							bij += c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1494  						}
  1495  						bi[j] = alpha * bij
  1496  					}
  1497  				}
  1498  			} else {
  1499  				for i := 0; i < m; i++ {
  1500  					bi := b[i*ldb : i*ldb+n]
  1501  					for j := n - 1; j >= 0; j-- {
  1502  						bij := bi[j]
  1503  						if noConj {
  1504  							if noUnit {
  1505  								bij *= a[j*lda+j]
  1506  							}
  1507  							bij += c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j])
  1508  						} else {
  1509  							if noUnit {
  1510  								bij *= cmplx.Conj(a[j*lda+j])
  1511  							}
  1512  							bij += c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j])
  1513  						}
  1514  						bi[j] = alpha * bij
  1515  					}
  1516  				}
  1517  			}
  1518  		}
  1519  	}
  1520  }
  1521  
  1522  // Ctrsm solves one of the matrix equations
  1523  //  op(A) * X = alpha * B  if side == blas.Left,
  1524  //  X * op(A) = alpha * B  if side == blas.Right,
  1525  // where alpha is a scalar, X and B are m×n matrices, A is a unit or
  1526  // non-unit, upper or lower triangular matrix and op(A) is one of
  1527  //  op(A) = A   if transA == blas.NoTrans,
  1528  //  op(A) = Aᵀ  if transA == blas.Trans,
  1529  //  op(A) = Aᴴ  if transA == blas.ConjTrans.
  1530  // On return the matrix X is overwritten on B.
  1531  //
  1532  // Complex64 implementations are autogenerated and not directly tested.
  1533  func (Implementation) Ctrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
  1534  	na := m
  1535  	if side == blas.Right {
  1536  		na = n
  1537  	}
  1538  	switch {
  1539  	case side != blas.Left && side != blas.Right:
  1540  		panic(badSide)
  1541  	case uplo != blas.Lower && uplo != blas.Upper:
  1542  		panic(badUplo)
  1543  	case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans:
  1544  		panic(badTranspose)
  1545  	case diag != blas.Unit && diag != blas.NonUnit:
  1546  		panic(badDiag)
  1547  	case m < 0:
  1548  		panic(mLT0)
  1549  	case n < 0:
  1550  		panic(nLT0)
  1551  	case lda < max(1, na):
  1552  		panic(badLdA)
  1553  	case ldb < max(1, n):
  1554  		panic(badLdB)
  1555  	}
  1556  
  1557  	// Quick return if possible.
  1558  	if m == 0 || n == 0 {
  1559  		return
  1560  	}
  1561  
  1562  	// For zero matrix size the following slice length checks are trivially satisfied.
  1563  	if len(a) < (na-1)*lda+na {
  1564  		panic(shortA)
  1565  	}
  1566  	if len(b) < (m-1)*ldb+n {
  1567  		panic(shortB)
  1568  	}
  1569  
  1570  	if alpha == 0 {
  1571  		for i := 0; i < m; i++ {
  1572  			for j := 0; j < n; j++ {
  1573  				b[i*ldb+j] = 0
  1574  			}
  1575  		}
  1576  		return
  1577  	}
  1578  
  1579  	noConj := transA != blas.ConjTrans
  1580  	noUnit := diag == blas.NonUnit
  1581  	if side == blas.Left {
  1582  		if transA == blas.NoTrans {
  1583  			// Form  B = alpha*inv(A)*B.
  1584  			if uplo == blas.Upper {
  1585  				for i := m - 1; i >= 0; i-- {
  1586  					bi := b[i*ldb : i*ldb+n]
  1587  					if alpha != 1 {
  1588  						c64.ScalUnitary(alpha, bi)
  1589  					}
  1590  					for ka, aik := range a[i*lda+i+1 : i*lda+m] {
  1591  						k := i + 1 + ka
  1592  						if aik != 0 {
  1593  							c64.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi)
  1594  						}
  1595  					}
  1596  					if noUnit {
  1597  						c64.ScalUnitary(1/a[i*lda+i], bi)
  1598  					}
  1599  				}
  1600  			} else {
  1601  				for i := 0; i < m; i++ {
  1602  					bi := b[i*ldb : i*ldb+n]
  1603  					if alpha != 1 {
  1604  						c64.ScalUnitary(alpha, bi)
  1605  					}
  1606  					for j, aij := range a[i*lda : i*lda+i] {
  1607  						if aij != 0 {
  1608  							c64.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi)
  1609  						}
  1610  					}
  1611  					if noUnit {
  1612  						c64.ScalUnitary(1/a[i*lda+i], bi)
  1613  					}
  1614  				}
  1615  			}
  1616  		} else {
  1617  			// Form  B = alpha*inv(Aᵀ)*B  or  B = alpha*inv(Aᴴ)*B.
  1618  			if uplo == blas.Upper {
  1619  				for i := 0; i < m; i++ {
  1620  					bi := b[i*ldb : i*ldb+n]
  1621  					if noUnit {
  1622  						if noConj {
  1623  							c64.ScalUnitary(1/a[i*lda+i], bi)
  1624  						} else {
  1625  							c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi)
  1626  						}
  1627  					}
  1628  					for ja, aij := range a[i*lda+i+1 : i*lda+m] {
  1629  						if aij == 0 {
  1630  							continue
  1631  						}
  1632  						j := i + 1 + ja
  1633  						if noConj {
  1634  							c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n])
  1635  						} else {
  1636  							c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n])
  1637  						}
  1638  					}
  1639  					if alpha != 1 {
  1640  						c64.ScalUnitary(alpha, bi)
  1641  					}
  1642  				}
  1643  			} else {
  1644  				for i := m - 1; i >= 0; i-- {
  1645  					bi := b[i*ldb : i*ldb+n]
  1646  					if noUnit {
  1647  						if noConj {
  1648  							c64.ScalUnitary(1/a[i*lda+i], bi)
  1649  						} else {
  1650  							c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi)
  1651  						}
  1652  					}
  1653  					for j, aij := range a[i*lda : i*lda+i] {
  1654  						if aij == 0 {
  1655  							continue
  1656  						}
  1657  						if noConj {
  1658  							c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n])
  1659  						} else {
  1660  							c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n])
  1661  						}
  1662  					}
  1663  					if alpha != 1 {
  1664  						c64.ScalUnitary(alpha, bi)
  1665  					}
  1666  				}
  1667  			}
  1668  		}
  1669  	} else {
  1670  		if transA == blas.NoTrans {
  1671  			// Form  B = alpha*B*inv(A).
  1672  			if uplo == blas.Upper {
  1673  				for i := 0; i < m; i++ {
  1674  					bi := b[i*ldb : i*ldb+n]
  1675  					if alpha != 1 {
  1676  						c64.ScalUnitary(alpha, bi)
  1677  					}
  1678  					for j, bij := range bi {
  1679  						if bij == 0 {
  1680  							continue
  1681  						}
  1682  						if noUnit {
  1683  							bi[j] /= a[j*lda+j]
  1684  						}
  1685  						c64.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1686  					}
  1687  				}
  1688  			} else {
  1689  				for i := 0; i < m; i++ {
  1690  					bi := b[i*ldb : i*ldb+n]
  1691  					if alpha != 1 {
  1692  						c64.ScalUnitary(alpha, bi)
  1693  					}
  1694  					for j := n - 1; j >= 0; j-- {
  1695  						if bi[j] == 0 {
  1696  							continue
  1697  						}
  1698  						if noUnit {
  1699  							bi[j] /= a[j*lda+j]
  1700  						}
  1701  						c64.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j])
  1702  					}
  1703  				}
  1704  			}
  1705  		} else {
  1706  			// Form  B = alpha*B*inv(Aᵀ)  or   B = alpha*B*inv(Aᴴ).
  1707  			if uplo == blas.Upper {
  1708  				for i := 0; i < m; i++ {
  1709  					bi := b[i*ldb : i*ldb+n]
  1710  					for j := n - 1; j >= 0; j-- {
  1711  						bij := alpha * bi[j]
  1712  						if noConj {
  1713  							bij -= c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1714  							if noUnit {
  1715  								bij /= a[j*lda+j]
  1716  							}
  1717  						} else {
  1718  							bij -= c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1719  							if noUnit {
  1720  								bij /= cmplx.Conj(a[j*lda+j])
  1721  							}
  1722  						}
  1723  						bi[j] = bij
  1724  					}
  1725  				}
  1726  			} else {
  1727  				for i := 0; i < m; i++ {
  1728  					bi := b[i*ldb : i*ldb+n]
  1729  					for j, bij := range bi {
  1730  						bij *= alpha
  1731  						if noConj {
  1732  							bij -= c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j])
  1733  							if noUnit {
  1734  								bij /= a[j*lda+j]
  1735  							}
  1736  						} else {
  1737  							bij -= c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j])
  1738  							if noUnit {
  1739  								bij /= cmplx.Conj(a[j*lda+j])
  1740  							}
  1741  						}
  1742  						bi[j] = bij
  1743  					}
  1744  				}
  1745  			}
  1746  		}
  1747  	}
  1748  }