gonum.org/v1/gonum@v0.14.0/blas/gonum/level3cmplx64.go (about)

     1  // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT.
     2  
     3  // Copyright ©2019 The Gonum Authors. All rights reserved.
     4  // Use of this source code is governed by a BSD-style
     5  // license that can be found in the LICENSE file.
     6  
     7  package gonum
     8  
     9  import (
    10  	cmplx "gonum.org/v1/gonum/internal/cmplx64"
    11  
    12  	"gonum.org/v1/gonum/blas"
    13  	"gonum.org/v1/gonum/internal/asm/c64"
    14  )
    15  
    16  var _ blas.Complex64Level3 = Implementation{}
    17  
    18  // Cgemm performs one of the matrix-matrix operations
    19  //
    20  //	C = alpha * op(A) * op(B) + beta * C
    21  //
    22  // where op(X) is one of
    23  //
    24  //	op(X) = X  or  op(X) = Xᵀ  or  op(X) = Xᴴ,
    25  //
    26  // alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix,
    27  // op(B) a k×n matrix and C an m×n matrix.
    28  //
    29  // Complex64 implementations are autogenerated and not directly tested.
    30  func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
    31  	switch tA {
    32  	default:
    33  		panic(badTranspose)
    34  	case blas.NoTrans, blas.Trans, blas.ConjTrans:
    35  	}
    36  	switch tB {
    37  	default:
    38  		panic(badTranspose)
    39  	case blas.NoTrans, blas.Trans, blas.ConjTrans:
    40  	}
    41  	switch {
    42  	case m < 0:
    43  		panic(mLT0)
    44  	case n < 0:
    45  		panic(nLT0)
    46  	case k < 0:
    47  		panic(kLT0)
    48  	}
    49  	rowA, colA := m, k
    50  	if tA != blas.NoTrans {
    51  		rowA, colA = k, m
    52  	}
    53  	if lda < max(1, colA) {
    54  		panic(badLdA)
    55  	}
    56  	rowB, colB := k, n
    57  	if tB != blas.NoTrans {
    58  		rowB, colB = n, k
    59  	}
    60  	if ldb < max(1, colB) {
    61  		panic(badLdB)
    62  	}
    63  	if ldc < max(1, n) {
    64  		panic(badLdC)
    65  	}
    66  
    67  	// Quick return if possible.
    68  	if m == 0 || n == 0 {
    69  		return
    70  	}
    71  
    72  	// For zero matrix size the following slice length checks are trivially satisfied.
    73  	if len(a) < (rowA-1)*lda+colA {
    74  		panic(shortA)
    75  	}
    76  	if len(b) < (rowB-1)*ldb+colB {
    77  		panic(shortB)
    78  	}
    79  	if len(c) < (m-1)*ldc+n {
    80  		panic(shortC)
    81  	}
    82  
    83  	// Quick return if possible.
    84  	if (alpha == 0 || k == 0) && beta == 1 {
    85  		return
    86  	}
    87  
    88  	if alpha == 0 {
    89  		if beta == 0 {
    90  			for i := 0; i < m; i++ {
    91  				for j := 0; j < n; j++ {
    92  					c[i*ldc+j] = 0
    93  				}
    94  			}
    95  		} else {
    96  			for i := 0; i < m; i++ {
    97  				for j := 0; j < n; j++ {
    98  					c[i*ldc+j] *= beta
    99  				}
   100  			}
   101  		}
   102  		return
   103  	}
   104  
   105  	switch tA {
   106  	case blas.NoTrans:
   107  		switch tB {
   108  		case blas.NoTrans:
   109  			// Form  C = alpha * A * B + beta * C.
   110  			for i := 0; i < m; i++ {
   111  				switch {
   112  				case beta == 0:
   113  					for j := 0; j < n; j++ {
   114  						c[i*ldc+j] = 0
   115  					}
   116  				case beta != 1:
   117  					for j := 0; j < n; j++ {
   118  						c[i*ldc+j] *= beta
   119  					}
   120  				}
   121  				for l := 0; l < k; l++ {
   122  					tmp := alpha * a[i*lda+l]
   123  					for j := 0; j < n; j++ {
   124  						c[i*ldc+j] += tmp * b[l*ldb+j]
   125  					}
   126  				}
   127  			}
   128  		case blas.Trans:
   129  			// Form  C = alpha * A * Bᵀ + beta * C.
   130  			for i := 0; i < m; i++ {
   131  				switch {
   132  				case beta == 0:
   133  					for j := 0; j < n; j++ {
   134  						c[i*ldc+j] = 0
   135  					}
   136  				case beta != 1:
   137  					for j := 0; j < n; j++ {
   138  						c[i*ldc+j] *= beta
   139  					}
   140  				}
   141  				for l := 0; l < k; l++ {
   142  					tmp := alpha * a[i*lda+l]
   143  					for j := 0; j < n; j++ {
   144  						c[i*ldc+j] += tmp * b[j*ldb+l]
   145  					}
   146  				}
   147  			}
   148  		case blas.ConjTrans:
   149  			// Form  C = alpha * A * Bᴴ + beta * C.
   150  			for i := 0; i < m; i++ {
   151  				switch {
   152  				case beta == 0:
   153  					for j := 0; j < n; j++ {
   154  						c[i*ldc+j] = 0
   155  					}
   156  				case beta != 1:
   157  					for j := 0; j < n; j++ {
   158  						c[i*ldc+j] *= beta
   159  					}
   160  				}
   161  				for l := 0; l < k; l++ {
   162  					tmp := alpha * a[i*lda+l]
   163  					for j := 0; j < n; j++ {
   164  						c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l])
   165  					}
   166  				}
   167  			}
   168  		}
   169  	case blas.Trans:
   170  		switch tB {
   171  		case blas.NoTrans:
   172  			// Form  C = alpha * Aᵀ * B + beta * C.
   173  			for i := 0; i < m; i++ {
   174  				for j := 0; j < n; j++ {
   175  					var tmp complex64
   176  					for l := 0; l < k; l++ {
   177  						tmp += a[l*lda+i] * b[l*ldb+j]
   178  					}
   179  					if beta == 0 {
   180  						c[i*ldc+j] = alpha * tmp
   181  					} else {
   182  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   183  					}
   184  				}
   185  			}
   186  		case blas.Trans:
   187  			// Form  C = alpha * Aᵀ * Bᵀ + beta * C.
   188  			for i := 0; i < m; i++ {
   189  				for j := 0; j < n; j++ {
   190  					var tmp complex64
   191  					for l := 0; l < k; l++ {
   192  						tmp += a[l*lda+i] * b[j*ldb+l]
   193  					}
   194  					if beta == 0 {
   195  						c[i*ldc+j] = alpha * tmp
   196  					} else {
   197  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   198  					}
   199  				}
   200  			}
   201  		case blas.ConjTrans:
   202  			// Form  C = alpha * Aᵀ * Bᴴ + beta * C.
   203  			for i := 0; i < m; i++ {
   204  				for j := 0; j < n; j++ {
   205  					var tmp complex64
   206  					for l := 0; l < k; l++ {
   207  						tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l])
   208  					}
   209  					if beta == 0 {
   210  						c[i*ldc+j] = alpha * tmp
   211  					} else {
   212  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   213  					}
   214  				}
   215  			}
   216  		}
   217  	case blas.ConjTrans:
   218  		switch tB {
   219  		case blas.NoTrans:
   220  			// Form  C = alpha * Aᴴ * B + beta * C.
   221  			for i := 0; i < m; i++ {
   222  				for j := 0; j < n; j++ {
   223  					var tmp complex64
   224  					for l := 0; l < k; l++ {
   225  						tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j]
   226  					}
   227  					if beta == 0 {
   228  						c[i*ldc+j] = alpha * tmp
   229  					} else {
   230  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   231  					}
   232  				}
   233  			}
   234  		case blas.Trans:
   235  			// Form  C = alpha * Aᴴ * Bᵀ + beta * C.
   236  			for i := 0; i < m; i++ {
   237  				for j := 0; j < n; j++ {
   238  					var tmp complex64
   239  					for l := 0; l < k; l++ {
   240  						tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l]
   241  					}
   242  					if beta == 0 {
   243  						c[i*ldc+j] = alpha * tmp
   244  					} else {
   245  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   246  					}
   247  				}
   248  			}
   249  		case blas.ConjTrans:
   250  			// Form  C = alpha * Aᴴ * Bᴴ + beta * C.
   251  			for i := 0; i < m; i++ {
   252  				for j := 0; j < n; j++ {
   253  					var tmp complex64
   254  					for l := 0; l < k; l++ {
   255  						tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l])
   256  					}
   257  					if beta == 0 {
   258  						c[i*ldc+j] = alpha * tmp
   259  					} else {
   260  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   261  					}
   262  				}
   263  			}
   264  		}
   265  	}
   266  }
   267  
   268  // Chemm performs one of the matrix-matrix operations
   269  //
   270  //	C = alpha*A*B + beta*C  if side == blas.Left
   271  //	C = alpha*B*A + beta*C  if side == blas.Right
   272  //
   273  // where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B
   274  // and C are m×n matrices. The imaginary parts of the diagonal elements of A are
   275  // assumed to be zero.
   276  //
   277  // Complex64 implementations are autogenerated and not directly tested.
   278  func (Implementation) Chemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
   279  	na := m
   280  	if side == blas.Right {
   281  		na = n
   282  	}
   283  	switch {
   284  	case side != blas.Left && side != blas.Right:
   285  		panic(badSide)
   286  	case uplo != blas.Lower && uplo != blas.Upper:
   287  		panic(badUplo)
   288  	case m < 0:
   289  		panic(mLT0)
   290  	case n < 0:
   291  		panic(nLT0)
   292  	case lda < max(1, na):
   293  		panic(badLdA)
   294  	case ldb < max(1, n):
   295  		panic(badLdB)
   296  	case ldc < max(1, n):
   297  		panic(badLdC)
   298  	}
   299  
   300  	// Quick return if possible.
   301  	if m == 0 || n == 0 {
   302  		return
   303  	}
   304  
   305  	// For zero matrix size the following slice length checks are trivially satisfied.
   306  	if len(a) < lda*(na-1)+na {
   307  		panic(shortA)
   308  	}
   309  	if len(b) < ldb*(m-1)+n {
   310  		panic(shortB)
   311  	}
   312  	if len(c) < ldc*(m-1)+n {
   313  		panic(shortC)
   314  	}
   315  
   316  	// Quick return if possible.
   317  	if alpha == 0 && beta == 1 {
   318  		return
   319  	}
   320  
   321  	if alpha == 0 {
   322  		if beta == 0 {
   323  			for i := 0; i < m; i++ {
   324  				ci := c[i*ldc : i*ldc+n]
   325  				for j := range ci {
   326  					ci[j] = 0
   327  				}
   328  			}
   329  		} else {
   330  			for i := 0; i < m; i++ {
   331  				ci := c[i*ldc : i*ldc+n]
   332  				c64.ScalUnitary(beta, ci)
   333  			}
   334  		}
   335  		return
   336  	}
   337  
   338  	if side == blas.Left {
   339  		// Form  C = alpha*A*B + beta*C.
   340  		for i := 0; i < m; i++ {
   341  			atmp := alpha * complex(real(a[i*lda+i]), 0)
   342  			bi := b[i*ldb : i*ldb+n]
   343  			ci := c[i*ldc : i*ldc+n]
   344  			if beta == 0 {
   345  				for j, bij := range bi {
   346  					ci[j] = atmp * bij
   347  				}
   348  			} else {
   349  				for j, bij := range bi {
   350  					ci[j] = atmp*bij + beta*ci[j]
   351  				}
   352  			}
   353  			if uplo == blas.Upper {
   354  				for k := 0; k < i; k++ {
   355  					atmp = alpha * cmplx.Conj(a[k*lda+i])
   356  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   357  				}
   358  				for k := i + 1; k < m; k++ {
   359  					atmp = alpha * a[i*lda+k]
   360  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   361  				}
   362  			} else {
   363  				for k := 0; k < i; k++ {
   364  					atmp = alpha * a[i*lda+k]
   365  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   366  				}
   367  				for k := i + 1; k < m; k++ {
   368  					atmp = alpha * cmplx.Conj(a[k*lda+i])
   369  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   370  				}
   371  			}
   372  		}
   373  	} else {
   374  		// Form  C = alpha*B*A + beta*C.
   375  		if uplo == blas.Upper {
   376  			for i := 0; i < m; i++ {
   377  				for j := n - 1; j >= 0; j-- {
   378  					abij := alpha * b[i*ldb+j]
   379  					aj := a[j*lda+j+1 : j*lda+n]
   380  					bi := b[i*ldb+j+1 : i*ldb+n]
   381  					ci := c[i*ldc+j+1 : i*ldc+n]
   382  					var tmp complex64
   383  					for k, ajk := range aj {
   384  						ci[k] += abij * ajk
   385  						tmp += bi[k] * cmplx.Conj(ajk)
   386  					}
   387  					ajj := complex(real(a[j*lda+j]), 0)
   388  					if beta == 0 {
   389  						c[i*ldc+j] = abij*ajj + alpha*tmp
   390  					} else {
   391  						c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j]
   392  					}
   393  				}
   394  			}
   395  		} else {
   396  			for i := 0; i < m; i++ {
   397  				for j := 0; j < n; j++ {
   398  					abij := alpha * b[i*ldb+j]
   399  					aj := a[j*lda : j*lda+j]
   400  					bi := b[i*ldb : i*ldb+j]
   401  					ci := c[i*ldc : i*ldc+j]
   402  					var tmp complex64
   403  					for k, ajk := range aj {
   404  						ci[k] += abij * ajk
   405  						tmp += bi[k] * cmplx.Conj(ajk)
   406  					}
   407  					ajj := complex(real(a[j*lda+j]), 0)
   408  					if beta == 0 {
   409  						c[i*ldc+j] = abij*ajj + alpha*tmp
   410  					} else {
   411  						c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j]
   412  					}
   413  				}
   414  			}
   415  		}
   416  	}
   417  }
   418  
   419  // Cherk performs one of the hermitian rank-k operations
   420  //
   421  //	C = alpha*A*Aᴴ + beta*C  if trans == blas.NoTrans
   422  //	C = alpha*Aᴴ*A + beta*C  if trans == blas.ConjTrans
   423  //
   424  // where alpha and beta are real scalars, C is an n×n hermitian matrix and A is
   425  // an n×k matrix in the first case and a k×n matrix in the second case.
   426  //
   427  // The imaginary parts of the diagonal elements of C are assumed to be zero, and
   428  // on return they will be set to zero.
   429  //
   430  // Complex64 implementations are autogenerated and not directly tested.
   431  func (Implementation) Cherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) {
   432  	var rowA, colA int
   433  	switch trans {
   434  	default:
   435  		panic(badTranspose)
   436  	case blas.NoTrans:
   437  		rowA, colA = n, k
   438  	case blas.ConjTrans:
   439  		rowA, colA = k, n
   440  	}
   441  	switch {
   442  	case uplo != blas.Lower && uplo != blas.Upper:
   443  		panic(badUplo)
   444  	case n < 0:
   445  		panic(nLT0)
   446  	case k < 0:
   447  		panic(kLT0)
   448  	case lda < max(1, colA):
   449  		panic(badLdA)
   450  	case ldc < max(1, n):
   451  		panic(badLdC)
   452  	}
   453  
   454  	// Quick return if possible.
   455  	if n == 0 {
   456  		return
   457  	}
   458  
   459  	// For zero matrix size the following slice length checks are trivially satisfied.
   460  	if len(a) < (rowA-1)*lda+colA {
   461  		panic(shortA)
   462  	}
   463  	if len(c) < (n-1)*ldc+n {
   464  		panic(shortC)
   465  	}
   466  
   467  	// Quick return if possible.
   468  	if (alpha == 0 || k == 0) && beta == 1 {
   469  		return
   470  	}
   471  
   472  	if alpha == 0 {
   473  		if uplo == blas.Upper {
   474  			if beta == 0 {
   475  				for i := 0; i < n; i++ {
   476  					ci := c[i*ldc+i : i*ldc+n]
   477  					for j := range ci {
   478  						ci[j] = 0
   479  					}
   480  				}
   481  			} else {
   482  				for i := 0; i < n; i++ {
   483  					ci := c[i*ldc+i : i*ldc+n]
   484  					ci[0] = complex(beta*real(ci[0]), 0)
   485  					if i != n-1 {
   486  						c64.SscalUnitary(beta, ci[1:])
   487  					}
   488  				}
   489  			}
   490  		} else {
   491  			if beta == 0 {
   492  				for i := 0; i < n; i++ {
   493  					ci := c[i*ldc : i*ldc+i+1]
   494  					for j := range ci {
   495  						ci[j] = 0
   496  					}
   497  				}
   498  			} else {
   499  				for i := 0; i < n; i++ {
   500  					ci := c[i*ldc : i*ldc+i+1]
   501  					if i != 0 {
   502  						c64.SscalUnitary(beta, ci[:i])
   503  					}
   504  					ci[i] = complex(beta*real(ci[i]), 0)
   505  				}
   506  			}
   507  		}
   508  		return
   509  	}
   510  
   511  	calpha := complex(alpha, 0)
   512  	if trans == blas.NoTrans {
   513  		// Form  C = alpha*A*Aᴴ + beta*C.
   514  		cbeta := complex(beta, 0)
   515  		if uplo == blas.Upper {
   516  			for i := 0; i < n; i++ {
   517  				ci := c[i*ldc+i : i*ldc+n]
   518  				ai := a[i*lda : i*lda+k]
   519  				switch {
   520  				case beta == 0:
   521  					// Handle the i-th diagonal element of C.
   522  					ci[0] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0)
   523  					// Handle the remaining elements on the i-th row of C.
   524  					for jc := range ci[1:] {
   525  						j := i + 1 + jc
   526  						ci[jc+1] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai)
   527  					}
   528  				case beta != 1:
   529  					cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[0]
   530  					ci[0] = complex(real(cii), 0)
   531  					for jc, cij := range ci[1:] {
   532  						j := i + 1 + jc
   533  						ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij
   534  					}
   535  				default:
   536  					cii := calpha*c64.DotcUnitary(ai, ai) + ci[0]
   537  					ci[0] = complex(real(cii), 0)
   538  					for jc, cij := range ci[1:] {
   539  						j := i + 1 + jc
   540  						ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij
   541  					}
   542  				}
   543  			}
   544  		} else {
   545  			for i := 0; i < n; i++ {
   546  				ci := c[i*ldc : i*ldc+i+1]
   547  				ai := a[i*lda : i*lda+k]
   548  				switch {
   549  				case beta == 0:
   550  					// Handle the first i-1 elements on the i-th row of C.
   551  					for j := range ci[:i] {
   552  						ci[j] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai)
   553  					}
   554  					// Handle the i-th diagonal element of C.
   555  					ci[i] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0)
   556  				case beta != 1:
   557  					for j, cij := range ci[:i] {
   558  						ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij
   559  					}
   560  					cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[i]
   561  					ci[i] = complex(real(cii), 0)
   562  				default:
   563  					for j, cij := range ci[:i] {
   564  						ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij
   565  					}
   566  					cii := calpha*c64.DotcUnitary(ai, ai) + ci[i]
   567  					ci[i] = complex(real(cii), 0)
   568  				}
   569  			}
   570  		}
   571  	} else {
   572  		// Form  C = alpha*Aᴴ*A + beta*C.
   573  		if uplo == blas.Upper {
   574  			for i := 0; i < n; i++ {
   575  				ci := c[i*ldc+i : i*ldc+n]
   576  				switch {
   577  				case beta == 0:
   578  					for jc := range ci {
   579  						ci[jc] = 0
   580  					}
   581  				case beta != 1:
   582  					c64.SscalUnitary(beta, ci)
   583  					ci[0] = complex(real(ci[0]), 0)
   584  				default:
   585  					ci[0] = complex(real(ci[0]), 0)
   586  				}
   587  				for j := 0; j < k; j++ {
   588  					aji := cmplx.Conj(a[j*lda+i])
   589  					if aji != 0 {
   590  						c64.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci)
   591  					}
   592  				}
   593  				c[i*ldc+i] = complex(real(c[i*ldc+i]), 0)
   594  			}
   595  		} else {
   596  			for i := 0; i < n; i++ {
   597  				ci := c[i*ldc : i*ldc+i+1]
   598  				switch {
   599  				case beta == 0:
   600  					for j := range ci {
   601  						ci[j] = 0
   602  					}
   603  				case beta != 1:
   604  					c64.SscalUnitary(beta, ci)
   605  					ci[i] = complex(real(ci[i]), 0)
   606  				default:
   607  					ci[i] = complex(real(ci[i]), 0)
   608  				}
   609  				for j := 0; j < k; j++ {
   610  					aji := cmplx.Conj(a[j*lda+i])
   611  					if aji != 0 {
   612  						c64.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci)
   613  					}
   614  				}
   615  				c[i*ldc+i] = complex(real(c[i*ldc+i]), 0)
   616  			}
   617  		}
   618  	}
   619  }
   620  
   621  // Cher2k performs one of the hermitian rank-2k operations
   622  //
   623  //	C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C  if trans == blas.NoTrans
   624  //	C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C  if trans == blas.ConjTrans
   625  //
   626  // where alpha and beta are scalars with beta real, C is an n×n hermitian matrix
   627  // and A and B are n×k matrices in the first case and k×n matrices in the second case.
   628  //
   629  // The imaginary parts of the diagonal elements of C are assumed to be zero, and
   630  // on return they will be set to zero.
   631  //
   632  // Complex64 implementations are autogenerated and not directly tested.
   633  func (Implementation) Cher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) {
   634  	var row, col int
   635  	switch trans {
   636  	default:
   637  		panic(badTranspose)
   638  	case blas.NoTrans:
   639  		row, col = n, k
   640  	case blas.ConjTrans:
   641  		row, col = k, n
   642  	}
   643  	switch {
   644  	case uplo != blas.Lower && uplo != blas.Upper:
   645  		panic(badUplo)
   646  	case n < 0:
   647  		panic(nLT0)
   648  	case k < 0:
   649  		panic(kLT0)
   650  	case lda < max(1, col):
   651  		panic(badLdA)
   652  	case ldb < max(1, col):
   653  		panic(badLdB)
   654  	case ldc < max(1, n):
   655  		panic(badLdC)
   656  	}
   657  
   658  	// Quick return if possible.
   659  	if n == 0 {
   660  		return
   661  	}
   662  
   663  	// For zero matrix size the following slice length checks are trivially satisfied.
   664  	if len(a) < (row-1)*lda+col {
   665  		panic(shortA)
   666  	}
   667  	if len(b) < (row-1)*ldb+col {
   668  		panic(shortB)
   669  	}
   670  	if len(c) < (n-1)*ldc+n {
   671  		panic(shortC)
   672  	}
   673  
   674  	// Quick return if possible.
   675  	if (alpha == 0 || k == 0) && beta == 1 {
   676  		return
   677  	}
   678  
   679  	if alpha == 0 {
   680  		if uplo == blas.Upper {
   681  			if beta == 0 {
   682  				for i := 0; i < n; i++ {
   683  					ci := c[i*ldc+i : i*ldc+n]
   684  					for j := range ci {
   685  						ci[j] = 0
   686  					}
   687  				}
   688  			} else {
   689  				for i := 0; i < n; i++ {
   690  					ci := c[i*ldc+i : i*ldc+n]
   691  					ci[0] = complex(beta*real(ci[0]), 0)
   692  					if i != n-1 {
   693  						c64.SscalUnitary(beta, ci[1:])
   694  					}
   695  				}
   696  			}
   697  		} else {
   698  			if beta == 0 {
   699  				for i := 0; i < n; i++ {
   700  					ci := c[i*ldc : i*ldc+i+1]
   701  					for j := range ci {
   702  						ci[j] = 0
   703  					}
   704  				}
   705  			} else {
   706  				for i := 0; i < n; i++ {
   707  					ci := c[i*ldc : i*ldc+i+1]
   708  					if i != 0 {
   709  						c64.SscalUnitary(beta, ci[:i])
   710  					}
   711  					ci[i] = complex(beta*real(ci[i]), 0)
   712  				}
   713  			}
   714  		}
   715  		return
   716  	}
   717  
   718  	conjalpha := cmplx.Conj(alpha)
   719  	cbeta := complex(beta, 0)
   720  	if trans == blas.NoTrans {
   721  		// Form  C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C.
   722  		if uplo == blas.Upper {
   723  			for i := 0; i < n; i++ {
   724  				ci := c[i*ldc+i+1 : i*ldc+n]
   725  				ai := a[i*lda : i*lda+k]
   726  				bi := b[i*ldb : i*ldb+k]
   727  				if beta == 0 {
   728  					cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi)
   729  					c[i*ldc+i] = complex(real(cii), 0)
   730  					for jc := range ci {
   731  						j := i + 1 + jc
   732  						ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi)
   733  					}
   734  				} else {
   735  					cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i]
   736  					c[i*ldc+i] = complex(real(cii), 0)
   737  					for jc, cij := range ci {
   738  						j := i + 1 + jc
   739  						ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij
   740  					}
   741  				}
   742  			}
   743  		} else {
   744  			for i := 0; i < n; i++ {
   745  				ci := c[i*ldc : i*ldc+i]
   746  				ai := a[i*lda : i*lda+k]
   747  				bi := b[i*ldb : i*ldb+k]
   748  				if beta == 0 {
   749  					for j := range ci {
   750  						ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi)
   751  					}
   752  					cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi)
   753  					c[i*ldc+i] = complex(real(cii), 0)
   754  				} else {
   755  					for j, cij := range ci {
   756  						ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij
   757  					}
   758  					cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i]
   759  					c[i*ldc+i] = complex(real(cii), 0)
   760  				}
   761  			}
   762  		}
   763  	} else {
   764  		// Form  C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C.
   765  		if uplo == blas.Upper {
   766  			for i := 0; i < n; i++ {
   767  				ci := c[i*ldc+i : i*ldc+n]
   768  				switch {
   769  				case beta == 0:
   770  					for jc := range ci {
   771  						ci[jc] = 0
   772  					}
   773  				case beta != 1:
   774  					c64.SscalUnitary(beta, ci)
   775  					ci[0] = complex(real(ci[0]), 0)
   776  				default:
   777  					ci[0] = complex(real(ci[0]), 0)
   778  				}
   779  				for j := 0; j < k; j++ {
   780  					aji := a[j*lda+i]
   781  					bji := b[j*ldb+i]
   782  					if aji != 0 {
   783  						c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci)
   784  					}
   785  					if bji != 0 {
   786  						c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci)
   787  					}
   788  				}
   789  				ci[0] = complex(real(ci[0]), 0)
   790  			}
   791  		} else {
   792  			for i := 0; i < n; i++ {
   793  				ci := c[i*ldc : i*ldc+i+1]
   794  				switch {
   795  				case beta == 0:
   796  					for j := range ci {
   797  						ci[j] = 0
   798  					}
   799  				case beta != 1:
   800  					c64.SscalUnitary(beta, ci)
   801  					ci[i] = complex(real(ci[i]), 0)
   802  				default:
   803  					ci[i] = complex(real(ci[i]), 0)
   804  				}
   805  				for j := 0; j < k; j++ {
   806  					aji := a[j*lda+i]
   807  					bji := b[j*ldb+i]
   808  					if aji != 0 {
   809  						c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci)
   810  					}
   811  					if bji != 0 {
   812  						c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci)
   813  					}
   814  				}
   815  				ci[i] = complex(real(ci[i]), 0)
   816  			}
   817  		}
   818  	}
   819  }
   820  
   821  // Csymm performs one of the matrix-matrix operations
   822  //
   823  //	C = alpha*A*B + beta*C  if side == blas.Left
   824  //	C = alpha*B*A + beta*C  if side == blas.Right
   825  //
   826  // where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B
   827  // and C are m×n matrices.
   828  //
   829  // Complex64 implementations are autogenerated and not directly tested.
   830  func (Implementation) Csymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
   831  	na := m
   832  	if side == blas.Right {
   833  		na = n
   834  	}
   835  	switch {
   836  	case side != blas.Left && side != blas.Right:
   837  		panic(badSide)
   838  	case uplo != blas.Lower && uplo != blas.Upper:
   839  		panic(badUplo)
   840  	case m < 0:
   841  		panic(mLT0)
   842  	case n < 0:
   843  		panic(nLT0)
   844  	case lda < max(1, na):
   845  		panic(badLdA)
   846  	case ldb < max(1, n):
   847  		panic(badLdB)
   848  	case ldc < max(1, n):
   849  		panic(badLdC)
   850  	}
   851  
   852  	// Quick return if possible.
   853  	if m == 0 || n == 0 {
   854  		return
   855  	}
   856  
   857  	// For zero matrix size the following slice length checks are trivially satisfied.
   858  	if len(a) < lda*(na-1)+na {
   859  		panic(shortA)
   860  	}
   861  	if len(b) < ldb*(m-1)+n {
   862  		panic(shortB)
   863  	}
   864  	if len(c) < ldc*(m-1)+n {
   865  		panic(shortC)
   866  	}
   867  
   868  	// Quick return if possible.
   869  	if alpha == 0 && beta == 1 {
   870  		return
   871  	}
   872  
   873  	if alpha == 0 {
   874  		if beta == 0 {
   875  			for i := 0; i < m; i++ {
   876  				ci := c[i*ldc : i*ldc+n]
   877  				for j := range ci {
   878  					ci[j] = 0
   879  				}
   880  			}
   881  		} else {
   882  			for i := 0; i < m; i++ {
   883  				ci := c[i*ldc : i*ldc+n]
   884  				c64.ScalUnitary(beta, ci)
   885  			}
   886  		}
   887  		return
   888  	}
   889  
   890  	if side == blas.Left {
   891  		// Form  C = alpha*A*B + beta*C.
   892  		for i := 0; i < m; i++ {
   893  			atmp := alpha * a[i*lda+i]
   894  			bi := b[i*ldb : i*ldb+n]
   895  			ci := c[i*ldc : i*ldc+n]
   896  			if beta == 0 {
   897  				for j, bij := range bi {
   898  					ci[j] = atmp * bij
   899  				}
   900  			} else {
   901  				for j, bij := range bi {
   902  					ci[j] = atmp*bij + beta*ci[j]
   903  				}
   904  			}
   905  			if uplo == blas.Upper {
   906  				for k := 0; k < i; k++ {
   907  					atmp = alpha * a[k*lda+i]
   908  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   909  				}
   910  				for k := i + 1; k < m; k++ {
   911  					atmp = alpha * a[i*lda+k]
   912  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   913  				}
   914  			} else {
   915  				for k := 0; k < i; k++ {
   916  					atmp = alpha * a[i*lda+k]
   917  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   918  				}
   919  				for k := i + 1; k < m; k++ {
   920  					atmp = alpha * a[k*lda+i]
   921  					c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   922  				}
   923  			}
   924  		}
   925  	} else {
   926  		// Form  C = alpha*B*A + beta*C.
   927  		if uplo == blas.Upper {
   928  			for i := 0; i < m; i++ {
   929  				for j := n - 1; j >= 0; j-- {
   930  					abij := alpha * b[i*ldb+j]
   931  					aj := a[j*lda+j+1 : j*lda+n]
   932  					bi := b[i*ldb+j+1 : i*ldb+n]
   933  					ci := c[i*ldc+j+1 : i*ldc+n]
   934  					var tmp complex64
   935  					for k, ajk := range aj {
   936  						ci[k] += abij * ajk
   937  						tmp += bi[k] * ajk
   938  					}
   939  					if beta == 0 {
   940  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp
   941  					} else {
   942  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j]
   943  					}
   944  				}
   945  			}
   946  		} else {
   947  			for i := 0; i < m; i++ {
   948  				for j := 0; j < n; j++ {
   949  					abij := alpha * b[i*ldb+j]
   950  					aj := a[j*lda : j*lda+j]
   951  					bi := b[i*ldb : i*ldb+j]
   952  					ci := c[i*ldc : i*ldc+j]
   953  					var tmp complex64
   954  					for k, ajk := range aj {
   955  						ci[k] += abij * ajk
   956  						tmp += bi[k] * ajk
   957  					}
   958  					if beta == 0 {
   959  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp
   960  					} else {
   961  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j]
   962  					}
   963  				}
   964  			}
   965  		}
   966  	}
   967  }
   968  
   969  // Csyrk performs one of the symmetric rank-k operations
   970  //
   971  //	C = alpha*A*Aᵀ + beta*C  if trans == blas.NoTrans
   972  //	C = alpha*Aᵀ*A + beta*C  if trans == blas.Trans
   973  //
   974  // where alpha and beta are scalars, C is an n×n symmetric matrix and A is
   975  // an n×k matrix in the first case and a k×n matrix in the second case.
   976  //
   977  // Complex64 implementations are autogenerated and not directly tested.
   978  func (Implementation) Csyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) {
   979  	var rowA, colA int
   980  	switch trans {
   981  	default:
   982  		panic(badTranspose)
   983  	case blas.NoTrans:
   984  		rowA, colA = n, k
   985  	case blas.Trans:
   986  		rowA, colA = k, n
   987  	}
   988  	switch {
   989  	case uplo != blas.Lower && uplo != blas.Upper:
   990  		panic(badUplo)
   991  	case n < 0:
   992  		panic(nLT0)
   993  	case k < 0:
   994  		panic(kLT0)
   995  	case lda < max(1, colA):
   996  		panic(badLdA)
   997  	case ldc < max(1, n):
   998  		panic(badLdC)
   999  	}
  1000  
  1001  	// Quick return if possible.
  1002  	if n == 0 {
  1003  		return
  1004  	}
  1005  
  1006  	// For zero matrix size the following slice length checks are trivially satisfied.
  1007  	if len(a) < (rowA-1)*lda+colA {
  1008  		panic(shortA)
  1009  	}
  1010  	if len(c) < (n-1)*ldc+n {
  1011  		panic(shortC)
  1012  	}
  1013  
  1014  	// Quick return if possible.
  1015  	if (alpha == 0 || k == 0) && beta == 1 {
  1016  		return
  1017  	}
  1018  
  1019  	if alpha == 0 {
  1020  		if uplo == blas.Upper {
  1021  			if beta == 0 {
  1022  				for i := 0; i < n; i++ {
  1023  					ci := c[i*ldc+i : i*ldc+n]
  1024  					for j := range ci {
  1025  						ci[j] = 0
  1026  					}
  1027  				}
  1028  			} else {
  1029  				for i := 0; i < n; i++ {
  1030  					ci := c[i*ldc+i : i*ldc+n]
  1031  					c64.ScalUnitary(beta, ci)
  1032  				}
  1033  			}
  1034  		} else {
  1035  			if beta == 0 {
  1036  				for i := 0; i < n; i++ {
  1037  					ci := c[i*ldc : i*ldc+i+1]
  1038  					for j := range ci {
  1039  						ci[j] = 0
  1040  					}
  1041  				}
  1042  			} else {
  1043  				for i := 0; i < n; i++ {
  1044  					ci := c[i*ldc : i*ldc+i+1]
  1045  					c64.ScalUnitary(beta, ci)
  1046  				}
  1047  			}
  1048  		}
  1049  		return
  1050  	}
  1051  
  1052  	if trans == blas.NoTrans {
  1053  		// Form  C = alpha*A*Aᵀ + beta*C.
  1054  		if uplo == blas.Upper {
  1055  			for i := 0; i < n; i++ {
  1056  				ci := c[i*ldc+i : i*ldc+n]
  1057  				ai := a[i*lda : i*lda+k]
  1058  				if beta == 0 {
  1059  					for jc := range ci {
  1060  						j := i + jc
  1061  						ci[jc] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k])
  1062  					}
  1063  				} else {
  1064  					for jc, cij := range ci {
  1065  						j := i + jc
  1066  						ci[jc] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k])
  1067  					}
  1068  				}
  1069  			}
  1070  		} else {
  1071  			for i := 0; i < n; i++ {
  1072  				ci := c[i*ldc : i*ldc+i+1]
  1073  				ai := a[i*lda : i*lda+k]
  1074  				if beta == 0 {
  1075  					for j := range ci {
  1076  						ci[j] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k])
  1077  					}
  1078  				} else {
  1079  					for j, cij := range ci {
  1080  						ci[j] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k])
  1081  					}
  1082  				}
  1083  			}
  1084  		}
  1085  	} else {
  1086  		// Form  C = alpha*Aᵀ*A + beta*C.
  1087  		if uplo == blas.Upper {
  1088  			for i := 0; i < n; i++ {
  1089  				ci := c[i*ldc+i : i*ldc+n]
  1090  				switch {
  1091  				case beta == 0:
  1092  					for jc := range ci {
  1093  						ci[jc] = 0
  1094  					}
  1095  				case beta != 1:
  1096  					for jc := range ci {
  1097  						ci[jc] *= beta
  1098  					}
  1099  				}
  1100  				for j := 0; j < k; j++ {
  1101  					aji := a[j*lda+i]
  1102  					if aji != 0 {
  1103  						c64.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci)
  1104  					}
  1105  				}
  1106  			}
  1107  		} else {
  1108  			for i := 0; i < n; i++ {
  1109  				ci := c[i*ldc : i*ldc+i+1]
  1110  				switch {
  1111  				case beta == 0:
  1112  					for j := range ci {
  1113  						ci[j] = 0
  1114  					}
  1115  				case beta != 1:
  1116  					for j := range ci {
  1117  						ci[j] *= beta
  1118  					}
  1119  				}
  1120  				for j := 0; j < k; j++ {
  1121  					aji := a[j*lda+i]
  1122  					if aji != 0 {
  1123  						c64.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci)
  1124  					}
  1125  				}
  1126  			}
  1127  		}
  1128  	}
  1129  }
  1130  
  1131  // Csyr2k performs one of the symmetric rank-2k operations
  1132  //
  1133  //	C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C  if trans == blas.NoTrans
  1134  //	C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C  if trans == blas.Trans
  1135  //
  1136  // where alpha and beta are scalars, C is an n×n symmetric matrix and A and B
  1137  // are n×k matrices in the first case and k×n matrices in the second case.
  1138  //
  1139  // Complex64 implementations are autogenerated and not directly tested.
  1140  func (Implementation) Csyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) {
  1141  	var row, col int
  1142  	switch trans {
  1143  	default:
  1144  		panic(badTranspose)
  1145  	case blas.NoTrans:
  1146  		row, col = n, k
  1147  	case blas.Trans:
  1148  		row, col = k, n
  1149  	}
  1150  	switch {
  1151  	case uplo != blas.Lower && uplo != blas.Upper:
  1152  		panic(badUplo)
  1153  	case n < 0:
  1154  		panic(nLT0)
  1155  	case k < 0:
  1156  		panic(kLT0)
  1157  	case lda < max(1, col):
  1158  		panic(badLdA)
  1159  	case ldb < max(1, col):
  1160  		panic(badLdB)
  1161  	case ldc < max(1, n):
  1162  		panic(badLdC)
  1163  	}
  1164  
  1165  	// Quick return if possible.
  1166  	if n == 0 {
  1167  		return
  1168  	}
  1169  
  1170  	// For zero matrix size the following slice length checks are trivially satisfied.
  1171  	if len(a) < (row-1)*lda+col {
  1172  		panic(shortA)
  1173  	}
  1174  	if len(b) < (row-1)*ldb+col {
  1175  		panic(shortB)
  1176  	}
  1177  	if len(c) < (n-1)*ldc+n {
  1178  		panic(shortC)
  1179  	}
  1180  
  1181  	// Quick return if possible.
  1182  	if (alpha == 0 || k == 0) && beta == 1 {
  1183  		return
  1184  	}
  1185  
  1186  	if alpha == 0 {
  1187  		if uplo == blas.Upper {
  1188  			if beta == 0 {
  1189  				for i := 0; i < n; i++ {
  1190  					ci := c[i*ldc+i : i*ldc+n]
  1191  					for j := range ci {
  1192  						ci[j] = 0
  1193  					}
  1194  				}
  1195  			} else {
  1196  				for i := 0; i < n; i++ {
  1197  					ci := c[i*ldc+i : i*ldc+n]
  1198  					c64.ScalUnitary(beta, ci)
  1199  				}
  1200  			}
  1201  		} else {
  1202  			if beta == 0 {
  1203  				for i := 0; i < n; i++ {
  1204  					ci := c[i*ldc : i*ldc+i+1]
  1205  					for j := range ci {
  1206  						ci[j] = 0
  1207  					}
  1208  				}
  1209  			} else {
  1210  				for i := 0; i < n; i++ {
  1211  					ci := c[i*ldc : i*ldc+i+1]
  1212  					c64.ScalUnitary(beta, ci)
  1213  				}
  1214  			}
  1215  		}
  1216  		return
  1217  	}
  1218  
  1219  	if trans == blas.NoTrans {
  1220  		// Form  C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C.
  1221  		if uplo == blas.Upper {
  1222  			for i := 0; i < n; i++ {
  1223  				ci := c[i*ldc+i : i*ldc+n]
  1224  				ai := a[i*lda : i*lda+k]
  1225  				bi := b[i*ldb : i*ldb+k]
  1226  				if beta == 0 {
  1227  					for jc := range ci {
  1228  						j := i + jc
  1229  						ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k])
  1230  					}
  1231  				} else {
  1232  					for jc, cij := range ci {
  1233  						j := i + jc
  1234  						ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij
  1235  					}
  1236  				}
  1237  			}
  1238  		} else {
  1239  			for i := 0; i < n; i++ {
  1240  				ci := c[i*ldc : i*ldc+i+1]
  1241  				ai := a[i*lda : i*lda+k]
  1242  				bi := b[i*ldb : i*ldb+k]
  1243  				if beta == 0 {
  1244  					for j := range ci {
  1245  						ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k])
  1246  					}
  1247  				} else {
  1248  					for j, cij := range ci {
  1249  						ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij
  1250  					}
  1251  				}
  1252  			}
  1253  		}
  1254  	} else {
  1255  		// Form  C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C.
  1256  		if uplo == blas.Upper {
  1257  			for i := 0; i < n; i++ {
  1258  				ci := c[i*ldc+i : i*ldc+n]
  1259  				switch {
  1260  				case beta == 0:
  1261  					for jc := range ci {
  1262  						ci[jc] = 0
  1263  					}
  1264  				case beta != 1:
  1265  					for jc := range ci {
  1266  						ci[jc] *= beta
  1267  					}
  1268  				}
  1269  				for j := 0; j < k; j++ {
  1270  					aji := a[j*lda+i]
  1271  					bji := b[j*ldb+i]
  1272  					if aji != 0 {
  1273  						c64.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci)
  1274  					}
  1275  					if bji != 0 {
  1276  						c64.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci)
  1277  					}
  1278  				}
  1279  			}
  1280  		} else {
  1281  			for i := 0; i < n; i++ {
  1282  				ci := c[i*ldc : i*ldc+i+1]
  1283  				switch {
  1284  				case beta == 0:
  1285  					for j := range ci {
  1286  						ci[j] = 0
  1287  					}
  1288  				case beta != 1:
  1289  					for j := range ci {
  1290  						ci[j] *= beta
  1291  					}
  1292  				}
  1293  				for j := 0; j < k; j++ {
  1294  					aji := a[j*lda+i]
  1295  					bji := b[j*ldb+i]
  1296  					if aji != 0 {
  1297  						c64.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci)
  1298  					}
  1299  					if bji != 0 {
  1300  						c64.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci)
  1301  					}
  1302  				}
  1303  			}
  1304  		}
  1305  	}
  1306  }
  1307  
  1308  // Ctrmm performs one of the matrix-matrix operations
  1309  //
  1310  //	B = alpha * op(A) * B  if side == blas.Left,
  1311  //	B = alpha * B * op(A)  if side == blas.Right,
  1312  //
  1313  // where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit,
  1314  // upper or lower triangular matrix and op(A) is one of
  1315  //
  1316  //	op(A) = A   if trans == blas.NoTrans,
  1317  //	op(A) = Aᵀ  if trans == blas.Trans,
  1318  //	op(A) = Aᴴ  if trans == blas.ConjTrans.
  1319  //
  1320  // Complex64 implementations are autogenerated and not directly tested.
  1321  func (Implementation) Ctrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
  1322  	na := m
  1323  	if side == blas.Right {
  1324  		na = n
  1325  	}
  1326  	switch {
  1327  	case side != blas.Left && side != blas.Right:
  1328  		panic(badSide)
  1329  	case uplo != blas.Lower && uplo != blas.Upper:
  1330  		panic(badUplo)
  1331  	case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans:
  1332  		panic(badTranspose)
  1333  	case diag != blas.Unit && diag != blas.NonUnit:
  1334  		panic(badDiag)
  1335  	case m < 0:
  1336  		panic(mLT0)
  1337  	case n < 0:
  1338  		panic(nLT0)
  1339  	case lda < max(1, na):
  1340  		panic(badLdA)
  1341  	case ldb < max(1, n):
  1342  		panic(badLdB)
  1343  	}
  1344  
  1345  	// Quick return if possible.
  1346  	if m == 0 || n == 0 {
  1347  		return
  1348  	}
  1349  
  1350  	// For zero matrix size the following slice length checks are trivially satisfied.
  1351  	if len(a) < (na-1)*lda+na {
  1352  		panic(shortA)
  1353  	}
  1354  	if len(b) < (m-1)*ldb+n {
  1355  		panic(shortB)
  1356  	}
  1357  
  1358  	// Quick return if possible.
  1359  	if alpha == 0 {
  1360  		for i := 0; i < m; i++ {
  1361  			bi := b[i*ldb : i*ldb+n]
  1362  			for j := range bi {
  1363  				bi[j] = 0
  1364  			}
  1365  		}
  1366  		return
  1367  	}
  1368  
  1369  	noConj := trans != blas.ConjTrans
  1370  	noUnit := diag == blas.NonUnit
  1371  	if side == blas.Left {
  1372  		if trans == blas.NoTrans {
  1373  			// Form B = alpha*A*B.
  1374  			if uplo == blas.Upper {
  1375  				for i := 0; i < m; i++ {
  1376  					aii := alpha
  1377  					if noUnit {
  1378  						aii *= a[i*lda+i]
  1379  					}
  1380  					bi := b[i*ldb : i*ldb+n]
  1381  					for j := range bi {
  1382  						bi[j] *= aii
  1383  					}
  1384  					for ja, aij := range a[i*lda+i+1 : i*lda+m] {
  1385  						j := ja + i + 1
  1386  						if aij != 0 {
  1387  							c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi)
  1388  						}
  1389  					}
  1390  				}
  1391  			} else {
  1392  				for i := m - 1; i >= 0; i-- {
  1393  					aii := alpha
  1394  					if noUnit {
  1395  						aii *= a[i*lda+i]
  1396  					}
  1397  					bi := b[i*ldb : i*ldb+n]
  1398  					for j := range bi {
  1399  						bi[j] *= aii
  1400  					}
  1401  					for j, aij := range a[i*lda : i*lda+i] {
  1402  						if aij != 0 {
  1403  							c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi)
  1404  						}
  1405  					}
  1406  				}
  1407  			}
  1408  		} else {
  1409  			// Form B = alpha*Aᵀ*B  or  B = alpha*Aᴴ*B.
  1410  			if uplo == blas.Upper {
  1411  				for k := m - 1; k >= 0; k-- {
  1412  					bk := b[k*ldb : k*ldb+n]
  1413  					for ja, ajk := range a[k*lda+k+1 : k*lda+m] {
  1414  						if ajk == 0 {
  1415  							continue
  1416  						}
  1417  						j := k + 1 + ja
  1418  						if noConj {
  1419  							c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n])
  1420  						} else {
  1421  							c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n])
  1422  						}
  1423  					}
  1424  					akk := alpha
  1425  					if noUnit {
  1426  						if noConj {
  1427  							akk *= a[k*lda+k]
  1428  						} else {
  1429  							akk *= cmplx.Conj(a[k*lda+k])
  1430  						}
  1431  					}
  1432  					if akk != 1 {
  1433  						c64.ScalUnitary(akk, bk)
  1434  					}
  1435  				}
  1436  			} else {
  1437  				for k := 0; k < m; k++ {
  1438  					bk := b[k*ldb : k*ldb+n]
  1439  					for j, ajk := range a[k*lda : k*lda+k] {
  1440  						if ajk == 0 {
  1441  							continue
  1442  						}
  1443  						if noConj {
  1444  							c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n])
  1445  						} else {
  1446  							c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n])
  1447  						}
  1448  					}
  1449  					akk := alpha
  1450  					if noUnit {
  1451  						if noConj {
  1452  							akk *= a[k*lda+k]
  1453  						} else {
  1454  							akk *= cmplx.Conj(a[k*lda+k])
  1455  						}
  1456  					}
  1457  					if akk != 1 {
  1458  						c64.ScalUnitary(akk, bk)
  1459  					}
  1460  				}
  1461  			}
  1462  		}
  1463  	} else {
  1464  		if trans == blas.NoTrans {
  1465  			// Form B = alpha*B*A.
  1466  			if uplo == blas.Upper {
  1467  				for i := 0; i < m; i++ {
  1468  					bi := b[i*ldb : i*ldb+n]
  1469  					for k := n - 1; k >= 0; k-- {
  1470  						abik := alpha * bi[k]
  1471  						if abik == 0 {
  1472  							continue
  1473  						}
  1474  						bi[k] = abik
  1475  						if noUnit {
  1476  							bi[k] *= a[k*lda+k]
  1477  						}
  1478  						c64.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:])
  1479  					}
  1480  				}
  1481  			} else {
  1482  				for i := 0; i < m; i++ {
  1483  					bi := b[i*ldb : i*ldb+n]
  1484  					for k := 0; k < n; k++ {
  1485  						abik := alpha * bi[k]
  1486  						if abik == 0 {
  1487  							continue
  1488  						}
  1489  						bi[k] = abik
  1490  						if noUnit {
  1491  							bi[k] *= a[k*lda+k]
  1492  						}
  1493  						c64.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k])
  1494  					}
  1495  				}
  1496  			}
  1497  		} else {
  1498  			// Form B = alpha*B*Aᵀ  or  B = alpha*B*Aᴴ.
  1499  			if uplo == blas.Upper {
  1500  				for i := 0; i < m; i++ {
  1501  					bi := b[i*ldb : i*ldb+n]
  1502  					for j, bij := range bi {
  1503  						if noConj {
  1504  							if noUnit {
  1505  								bij *= a[j*lda+j]
  1506  							}
  1507  							bij += c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1508  						} else {
  1509  							if noUnit {
  1510  								bij *= cmplx.Conj(a[j*lda+j])
  1511  							}
  1512  							bij += c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1513  						}
  1514  						bi[j] = alpha * bij
  1515  					}
  1516  				}
  1517  			} else {
  1518  				for i := 0; i < m; i++ {
  1519  					bi := b[i*ldb : i*ldb+n]
  1520  					for j := n - 1; j >= 0; j-- {
  1521  						bij := bi[j]
  1522  						if noConj {
  1523  							if noUnit {
  1524  								bij *= a[j*lda+j]
  1525  							}
  1526  							bij += c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j])
  1527  						} else {
  1528  							if noUnit {
  1529  								bij *= cmplx.Conj(a[j*lda+j])
  1530  							}
  1531  							bij += c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j])
  1532  						}
  1533  						bi[j] = alpha * bij
  1534  					}
  1535  				}
  1536  			}
  1537  		}
  1538  	}
  1539  }
  1540  
  1541  // Ctrsm solves one of the matrix equations
  1542  //
  1543  //	op(A) * X = alpha * B  if side == blas.Left,
  1544  //	X * op(A) = alpha * B  if side == blas.Right,
  1545  //
  1546  // where alpha is a scalar, X and B are m×n matrices, A is a unit or
  1547  // non-unit, upper or lower triangular matrix and op(A) is one of
  1548  //
  1549  //	op(A) = A   if transA == blas.NoTrans,
  1550  //	op(A) = Aᵀ  if transA == blas.Trans,
  1551  //	op(A) = Aᴴ  if transA == blas.ConjTrans.
  1552  //
  1553  // On return the matrix X is overwritten on B.
  1554  //
  1555  // Complex64 implementations are autogenerated and not directly tested.
  1556  func (Implementation) Ctrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) {
  1557  	na := m
  1558  	if side == blas.Right {
  1559  		na = n
  1560  	}
  1561  	switch {
  1562  	case side != blas.Left && side != blas.Right:
  1563  		panic(badSide)
  1564  	case uplo != blas.Lower && uplo != blas.Upper:
  1565  		panic(badUplo)
  1566  	case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans:
  1567  		panic(badTranspose)
  1568  	case diag != blas.Unit && diag != blas.NonUnit:
  1569  		panic(badDiag)
  1570  	case m < 0:
  1571  		panic(mLT0)
  1572  	case n < 0:
  1573  		panic(nLT0)
  1574  	case lda < max(1, na):
  1575  		panic(badLdA)
  1576  	case ldb < max(1, n):
  1577  		panic(badLdB)
  1578  	}
  1579  
  1580  	// Quick return if possible.
  1581  	if m == 0 || n == 0 {
  1582  		return
  1583  	}
  1584  
  1585  	// For zero matrix size the following slice length checks are trivially satisfied.
  1586  	if len(a) < (na-1)*lda+na {
  1587  		panic(shortA)
  1588  	}
  1589  	if len(b) < (m-1)*ldb+n {
  1590  		panic(shortB)
  1591  	}
  1592  
  1593  	if alpha == 0 {
  1594  		for i := 0; i < m; i++ {
  1595  			for j := 0; j < n; j++ {
  1596  				b[i*ldb+j] = 0
  1597  			}
  1598  		}
  1599  		return
  1600  	}
  1601  
  1602  	noConj := transA != blas.ConjTrans
  1603  	noUnit := diag == blas.NonUnit
  1604  	if side == blas.Left {
  1605  		if transA == blas.NoTrans {
  1606  			// Form  B = alpha*inv(A)*B.
  1607  			if uplo == blas.Upper {
  1608  				for i := m - 1; i >= 0; i-- {
  1609  					bi := b[i*ldb : i*ldb+n]
  1610  					if alpha != 1 {
  1611  						c64.ScalUnitary(alpha, bi)
  1612  					}
  1613  					for ka, aik := range a[i*lda+i+1 : i*lda+m] {
  1614  						k := i + 1 + ka
  1615  						if aik != 0 {
  1616  							c64.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi)
  1617  						}
  1618  					}
  1619  					if noUnit {
  1620  						c64.ScalUnitary(1/a[i*lda+i], bi)
  1621  					}
  1622  				}
  1623  			} else {
  1624  				for i := 0; i < m; i++ {
  1625  					bi := b[i*ldb : i*ldb+n]
  1626  					if alpha != 1 {
  1627  						c64.ScalUnitary(alpha, bi)
  1628  					}
  1629  					for j, aij := range a[i*lda : i*lda+i] {
  1630  						if aij != 0 {
  1631  							c64.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi)
  1632  						}
  1633  					}
  1634  					if noUnit {
  1635  						c64.ScalUnitary(1/a[i*lda+i], bi)
  1636  					}
  1637  				}
  1638  			}
  1639  		} else {
  1640  			// Form  B = alpha*inv(Aᵀ)*B  or  B = alpha*inv(Aᴴ)*B.
  1641  			if uplo == blas.Upper {
  1642  				for i := 0; i < m; i++ {
  1643  					bi := b[i*ldb : i*ldb+n]
  1644  					if noUnit {
  1645  						if noConj {
  1646  							c64.ScalUnitary(1/a[i*lda+i], bi)
  1647  						} else {
  1648  							c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi)
  1649  						}
  1650  					}
  1651  					for ja, aij := range a[i*lda+i+1 : i*lda+m] {
  1652  						if aij == 0 {
  1653  							continue
  1654  						}
  1655  						j := i + 1 + ja
  1656  						if noConj {
  1657  							c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n])
  1658  						} else {
  1659  							c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n])
  1660  						}
  1661  					}
  1662  					if alpha != 1 {
  1663  						c64.ScalUnitary(alpha, bi)
  1664  					}
  1665  				}
  1666  			} else {
  1667  				for i := m - 1; i >= 0; i-- {
  1668  					bi := b[i*ldb : i*ldb+n]
  1669  					if noUnit {
  1670  						if noConj {
  1671  							c64.ScalUnitary(1/a[i*lda+i], bi)
  1672  						} else {
  1673  							c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi)
  1674  						}
  1675  					}
  1676  					for j, aij := range a[i*lda : i*lda+i] {
  1677  						if aij == 0 {
  1678  							continue
  1679  						}
  1680  						if noConj {
  1681  							c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n])
  1682  						} else {
  1683  							c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n])
  1684  						}
  1685  					}
  1686  					if alpha != 1 {
  1687  						c64.ScalUnitary(alpha, bi)
  1688  					}
  1689  				}
  1690  			}
  1691  		}
  1692  	} else {
  1693  		if transA == blas.NoTrans {
  1694  			// Form  B = alpha*B*inv(A).
  1695  			if uplo == blas.Upper {
  1696  				for i := 0; i < m; i++ {
  1697  					bi := b[i*ldb : i*ldb+n]
  1698  					if alpha != 1 {
  1699  						c64.ScalUnitary(alpha, bi)
  1700  					}
  1701  					for j, bij := range bi {
  1702  						if bij == 0 {
  1703  							continue
  1704  						}
  1705  						if noUnit {
  1706  							bi[j] /= a[j*lda+j]
  1707  						}
  1708  						c64.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1709  					}
  1710  				}
  1711  			} else {
  1712  				for i := 0; i < m; i++ {
  1713  					bi := b[i*ldb : i*ldb+n]
  1714  					if alpha != 1 {
  1715  						c64.ScalUnitary(alpha, bi)
  1716  					}
  1717  					for j := n - 1; j >= 0; j-- {
  1718  						if bi[j] == 0 {
  1719  							continue
  1720  						}
  1721  						if noUnit {
  1722  							bi[j] /= a[j*lda+j]
  1723  						}
  1724  						c64.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j])
  1725  					}
  1726  				}
  1727  			}
  1728  		} else {
  1729  			// Form  B = alpha*B*inv(Aᵀ)  or   B = alpha*B*inv(Aᴴ).
  1730  			if uplo == blas.Upper {
  1731  				for i := 0; i < m; i++ {
  1732  					bi := b[i*ldb : i*ldb+n]
  1733  					for j := n - 1; j >= 0; j-- {
  1734  						bij := alpha * bi[j]
  1735  						if noConj {
  1736  							bij -= c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1737  							if noUnit {
  1738  								bij /= a[j*lda+j]
  1739  							}
  1740  						} else {
  1741  							bij -= c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1742  							if noUnit {
  1743  								bij /= cmplx.Conj(a[j*lda+j])
  1744  							}
  1745  						}
  1746  						bi[j] = bij
  1747  					}
  1748  				}
  1749  			} else {
  1750  				for i := 0; i < m; i++ {
  1751  					bi := b[i*ldb : i*ldb+n]
  1752  					for j, bij := range bi {
  1753  						bij *= alpha
  1754  						if noConj {
  1755  							bij -= c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j])
  1756  							if noUnit {
  1757  								bij /= a[j*lda+j]
  1758  							}
  1759  						} else {
  1760  							bij -= c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j])
  1761  							if noUnit {
  1762  								bij /= cmplx.Conj(a[j*lda+j])
  1763  							}
  1764  						}
  1765  						bi[j] = bij
  1766  					}
  1767  				}
  1768  			}
  1769  		}
  1770  	}
  1771  }