gonum.org/v1/gonum@v0.14.0/blas/gonum/level3cmplx128.go (about)

     1  // Copyright ©2019 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"math/cmplx"
     9  
    10  	"gonum.org/v1/gonum/blas"
    11  	"gonum.org/v1/gonum/internal/asm/c128"
    12  )
    13  
    14  var _ blas.Complex128Level3 = Implementation{}
    15  
    16  // Zgemm performs one of the matrix-matrix operations
    17  //
    18  //	C = alpha * op(A) * op(B) + beta * C
    19  //
    20  // where op(X) is one of
    21  //
    22  //	op(X) = X  or  op(X) = Xᵀ  or  op(X) = Xᴴ,
    23  //
    24  // alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix,
    25  // op(B) a k×n matrix and C an m×n matrix.
    26  func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
    27  	switch tA {
    28  	default:
    29  		panic(badTranspose)
    30  	case blas.NoTrans, blas.Trans, blas.ConjTrans:
    31  	}
    32  	switch tB {
    33  	default:
    34  		panic(badTranspose)
    35  	case blas.NoTrans, blas.Trans, blas.ConjTrans:
    36  	}
    37  	switch {
    38  	case m < 0:
    39  		panic(mLT0)
    40  	case n < 0:
    41  		panic(nLT0)
    42  	case k < 0:
    43  		panic(kLT0)
    44  	}
    45  	rowA, colA := m, k
    46  	if tA != blas.NoTrans {
    47  		rowA, colA = k, m
    48  	}
    49  	if lda < max(1, colA) {
    50  		panic(badLdA)
    51  	}
    52  	rowB, colB := k, n
    53  	if tB != blas.NoTrans {
    54  		rowB, colB = n, k
    55  	}
    56  	if ldb < max(1, colB) {
    57  		panic(badLdB)
    58  	}
    59  	if ldc < max(1, n) {
    60  		panic(badLdC)
    61  	}
    62  
    63  	// Quick return if possible.
    64  	if m == 0 || n == 0 {
    65  		return
    66  	}
    67  
    68  	// For zero matrix size the following slice length checks are trivially satisfied.
    69  	if len(a) < (rowA-1)*lda+colA {
    70  		panic(shortA)
    71  	}
    72  	if len(b) < (rowB-1)*ldb+colB {
    73  		panic(shortB)
    74  	}
    75  	if len(c) < (m-1)*ldc+n {
    76  		panic(shortC)
    77  	}
    78  
    79  	// Quick return if possible.
    80  	if (alpha == 0 || k == 0) && beta == 1 {
    81  		return
    82  	}
    83  
    84  	if alpha == 0 {
    85  		if beta == 0 {
    86  			for i := 0; i < m; i++ {
    87  				for j := 0; j < n; j++ {
    88  					c[i*ldc+j] = 0
    89  				}
    90  			}
    91  		} else {
    92  			for i := 0; i < m; i++ {
    93  				for j := 0; j < n; j++ {
    94  					c[i*ldc+j] *= beta
    95  				}
    96  			}
    97  		}
    98  		return
    99  	}
   100  
   101  	switch tA {
   102  	case blas.NoTrans:
   103  		switch tB {
   104  		case blas.NoTrans:
   105  			// Form  C = alpha * A * B + beta * C.
   106  			for i := 0; i < m; i++ {
   107  				switch {
   108  				case beta == 0:
   109  					for j := 0; j < n; j++ {
   110  						c[i*ldc+j] = 0
   111  					}
   112  				case beta != 1:
   113  					for j := 0; j < n; j++ {
   114  						c[i*ldc+j] *= beta
   115  					}
   116  				}
   117  				for l := 0; l < k; l++ {
   118  					tmp := alpha * a[i*lda+l]
   119  					for j := 0; j < n; j++ {
   120  						c[i*ldc+j] += tmp * b[l*ldb+j]
   121  					}
   122  				}
   123  			}
   124  		case blas.Trans:
   125  			// Form  C = alpha * A * Bᵀ + beta * C.
   126  			for i := 0; i < m; i++ {
   127  				switch {
   128  				case beta == 0:
   129  					for j := 0; j < n; j++ {
   130  						c[i*ldc+j] = 0
   131  					}
   132  				case beta != 1:
   133  					for j := 0; j < n; j++ {
   134  						c[i*ldc+j] *= beta
   135  					}
   136  				}
   137  				for l := 0; l < k; l++ {
   138  					tmp := alpha * a[i*lda+l]
   139  					for j := 0; j < n; j++ {
   140  						c[i*ldc+j] += tmp * b[j*ldb+l]
   141  					}
   142  				}
   143  			}
   144  		case blas.ConjTrans:
   145  			// Form  C = alpha * A * Bᴴ + beta * C.
   146  			for i := 0; i < m; i++ {
   147  				switch {
   148  				case beta == 0:
   149  					for j := 0; j < n; j++ {
   150  						c[i*ldc+j] = 0
   151  					}
   152  				case beta != 1:
   153  					for j := 0; j < n; j++ {
   154  						c[i*ldc+j] *= beta
   155  					}
   156  				}
   157  				for l := 0; l < k; l++ {
   158  					tmp := alpha * a[i*lda+l]
   159  					for j := 0; j < n; j++ {
   160  						c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l])
   161  					}
   162  				}
   163  			}
   164  		}
   165  	case blas.Trans:
   166  		switch tB {
   167  		case blas.NoTrans:
   168  			// Form  C = alpha * Aᵀ * B + beta * C.
   169  			for i := 0; i < m; i++ {
   170  				for j := 0; j < n; j++ {
   171  					var tmp complex128
   172  					for l := 0; l < k; l++ {
   173  						tmp += a[l*lda+i] * b[l*ldb+j]
   174  					}
   175  					if beta == 0 {
   176  						c[i*ldc+j] = alpha * tmp
   177  					} else {
   178  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   179  					}
   180  				}
   181  			}
   182  		case blas.Trans:
   183  			// Form  C = alpha * Aᵀ * Bᵀ + beta * C.
   184  			for i := 0; i < m; i++ {
   185  				for j := 0; j < n; j++ {
   186  					var tmp complex128
   187  					for l := 0; l < k; l++ {
   188  						tmp += a[l*lda+i] * b[j*ldb+l]
   189  					}
   190  					if beta == 0 {
   191  						c[i*ldc+j] = alpha * tmp
   192  					} else {
   193  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   194  					}
   195  				}
   196  			}
   197  		case blas.ConjTrans:
   198  			// Form  C = alpha * Aᵀ * Bᴴ + beta * C.
   199  			for i := 0; i < m; i++ {
   200  				for j := 0; j < n; j++ {
   201  					var tmp complex128
   202  					for l := 0; l < k; l++ {
   203  						tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l])
   204  					}
   205  					if beta == 0 {
   206  						c[i*ldc+j] = alpha * tmp
   207  					} else {
   208  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   209  					}
   210  				}
   211  			}
   212  		}
   213  	case blas.ConjTrans:
   214  		switch tB {
   215  		case blas.NoTrans:
   216  			// Form  C = alpha * Aᴴ * B + beta * C.
   217  			for i := 0; i < m; i++ {
   218  				for j := 0; j < n; j++ {
   219  					var tmp complex128
   220  					for l := 0; l < k; l++ {
   221  						tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j]
   222  					}
   223  					if beta == 0 {
   224  						c[i*ldc+j] = alpha * tmp
   225  					} else {
   226  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   227  					}
   228  				}
   229  			}
   230  		case blas.Trans:
   231  			// Form  C = alpha * Aᴴ * Bᵀ + beta * C.
   232  			for i := 0; i < m; i++ {
   233  				for j := 0; j < n; j++ {
   234  					var tmp complex128
   235  					for l := 0; l < k; l++ {
   236  						tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l]
   237  					}
   238  					if beta == 0 {
   239  						c[i*ldc+j] = alpha * tmp
   240  					} else {
   241  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   242  					}
   243  				}
   244  			}
   245  		case blas.ConjTrans:
   246  			// Form  C = alpha * Aᴴ * Bᴴ + beta * C.
   247  			for i := 0; i < m; i++ {
   248  				for j := 0; j < n; j++ {
   249  					var tmp complex128
   250  					for l := 0; l < k; l++ {
   251  						tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l])
   252  					}
   253  					if beta == 0 {
   254  						c[i*ldc+j] = alpha * tmp
   255  					} else {
   256  						c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j]
   257  					}
   258  				}
   259  			}
   260  		}
   261  	}
   262  }
   263  
   264  // Zhemm performs one of the matrix-matrix operations
   265  //
   266  //	C = alpha*A*B + beta*C  if side == blas.Left
   267  //	C = alpha*B*A + beta*C  if side == blas.Right
   268  //
   269  // where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B
   270  // and C are m×n matrices. The imaginary parts of the diagonal elements of A are
   271  // assumed to be zero.
   272  func (Implementation) Zhemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
   273  	na := m
   274  	if side == blas.Right {
   275  		na = n
   276  	}
   277  	switch {
   278  	case side != blas.Left && side != blas.Right:
   279  		panic(badSide)
   280  	case uplo != blas.Lower && uplo != blas.Upper:
   281  		panic(badUplo)
   282  	case m < 0:
   283  		panic(mLT0)
   284  	case n < 0:
   285  		panic(nLT0)
   286  	case lda < max(1, na):
   287  		panic(badLdA)
   288  	case ldb < max(1, n):
   289  		panic(badLdB)
   290  	case ldc < max(1, n):
   291  		panic(badLdC)
   292  	}
   293  
   294  	// Quick return if possible.
   295  	if m == 0 || n == 0 {
   296  		return
   297  	}
   298  
   299  	// For zero matrix size the following slice length checks are trivially satisfied.
   300  	if len(a) < lda*(na-1)+na {
   301  		panic(shortA)
   302  	}
   303  	if len(b) < ldb*(m-1)+n {
   304  		panic(shortB)
   305  	}
   306  	if len(c) < ldc*(m-1)+n {
   307  		panic(shortC)
   308  	}
   309  
   310  	// Quick return if possible.
   311  	if alpha == 0 && beta == 1 {
   312  		return
   313  	}
   314  
   315  	if alpha == 0 {
   316  		if beta == 0 {
   317  			for i := 0; i < m; i++ {
   318  				ci := c[i*ldc : i*ldc+n]
   319  				for j := range ci {
   320  					ci[j] = 0
   321  				}
   322  			}
   323  		} else {
   324  			for i := 0; i < m; i++ {
   325  				ci := c[i*ldc : i*ldc+n]
   326  				c128.ScalUnitary(beta, ci)
   327  			}
   328  		}
   329  		return
   330  	}
   331  
   332  	if side == blas.Left {
   333  		// Form  C = alpha*A*B + beta*C.
   334  		for i := 0; i < m; i++ {
   335  			atmp := alpha * complex(real(a[i*lda+i]), 0)
   336  			bi := b[i*ldb : i*ldb+n]
   337  			ci := c[i*ldc : i*ldc+n]
   338  			if beta == 0 {
   339  				for j, bij := range bi {
   340  					ci[j] = atmp * bij
   341  				}
   342  			} else {
   343  				for j, bij := range bi {
   344  					ci[j] = atmp*bij + beta*ci[j]
   345  				}
   346  			}
   347  			if uplo == blas.Upper {
   348  				for k := 0; k < i; k++ {
   349  					atmp = alpha * cmplx.Conj(a[k*lda+i])
   350  					c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   351  				}
   352  				for k := i + 1; k < m; k++ {
   353  					atmp = alpha * a[i*lda+k]
   354  					c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   355  				}
   356  			} else {
   357  				for k := 0; k < i; k++ {
   358  					atmp = alpha * a[i*lda+k]
   359  					c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   360  				}
   361  				for k := i + 1; k < m; k++ {
   362  					atmp = alpha * cmplx.Conj(a[k*lda+i])
   363  					c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   364  				}
   365  			}
   366  		}
   367  	} else {
   368  		// Form  C = alpha*B*A + beta*C.
   369  		if uplo == blas.Upper {
   370  			for i := 0; i < m; i++ {
   371  				for j := n - 1; j >= 0; j-- {
   372  					abij := alpha * b[i*ldb+j]
   373  					aj := a[j*lda+j+1 : j*lda+n]
   374  					bi := b[i*ldb+j+1 : i*ldb+n]
   375  					ci := c[i*ldc+j+1 : i*ldc+n]
   376  					var tmp complex128
   377  					for k, ajk := range aj {
   378  						ci[k] += abij * ajk
   379  						tmp += bi[k] * cmplx.Conj(ajk)
   380  					}
   381  					ajj := complex(real(a[j*lda+j]), 0)
   382  					if beta == 0 {
   383  						c[i*ldc+j] = abij*ajj + alpha*tmp
   384  					} else {
   385  						c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j]
   386  					}
   387  				}
   388  			}
   389  		} else {
   390  			for i := 0; i < m; i++ {
   391  				for j := 0; j < n; j++ {
   392  					abij := alpha * b[i*ldb+j]
   393  					aj := a[j*lda : j*lda+j]
   394  					bi := b[i*ldb : i*ldb+j]
   395  					ci := c[i*ldc : i*ldc+j]
   396  					var tmp complex128
   397  					for k, ajk := range aj {
   398  						ci[k] += abij * ajk
   399  						tmp += bi[k] * cmplx.Conj(ajk)
   400  					}
   401  					ajj := complex(real(a[j*lda+j]), 0)
   402  					if beta == 0 {
   403  						c[i*ldc+j] = abij*ajj + alpha*tmp
   404  					} else {
   405  						c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j]
   406  					}
   407  				}
   408  			}
   409  		}
   410  	}
   411  }
   412  
   413  // Zherk performs one of the hermitian rank-k operations
   414  //
   415  //	C = alpha*A*Aᴴ + beta*C  if trans == blas.NoTrans
   416  //	C = alpha*Aᴴ*A + beta*C  if trans == blas.ConjTrans
   417  //
   418  // where alpha and beta are real scalars, C is an n×n hermitian matrix and A is
   419  // an n×k matrix in the first case and a k×n matrix in the second case.
   420  //
   421  // The imaginary parts of the diagonal elements of C are assumed to be zero, and
   422  // on return they will be set to zero.
   423  func (Implementation) Zherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) {
   424  	var rowA, colA int
   425  	switch trans {
   426  	default:
   427  		panic(badTranspose)
   428  	case blas.NoTrans:
   429  		rowA, colA = n, k
   430  	case blas.ConjTrans:
   431  		rowA, colA = k, n
   432  	}
   433  	switch {
   434  	case uplo != blas.Lower && uplo != blas.Upper:
   435  		panic(badUplo)
   436  	case n < 0:
   437  		panic(nLT0)
   438  	case k < 0:
   439  		panic(kLT0)
   440  	case lda < max(1, colA):
   441  		panic(badLdA)
   442  	case ldc < max(1, n):
   443  		panic(badLdC)
   444  	}
   445  
   446  	// Quick return if possible.
   447  	if n == 0 {
   448  		return
   449  	}
   450  
   451  	// For zero matrix size the following slice length checks are trivially satisfied.
   452  	if len(a) < (rowA-1)*lda+colA {
   453  		panic(shortA)
   454  	}
   455  	if len(c) < (n-1)*ldc+n {
   456  		panic(shortC)
   457  	}
   458  
   459  	// Quick return if possible.
   460  	if (alpha == 0 || k == 0) && beta == 1 {
   461  		return
   462  	}
   463  
   464  	if alpha == 0 {
   465  		if uplo == blas.Upper {
   466  			if beta == 0 {
   467  				for i := 0; i < n; i++ {
   468  					ci := c[i*ldc+i : i*ldc+n]
   469  					for j := range ci {
   470  						ci[j] = 0
   471  					}
   472  				}
   473  			} else {
   474  				for i := 0; i < n; i++ {
   475  					ci := c[i*ldc+i : i*ldc+n]
   476  					ci[0] = complex(beta*real(ci[0]), 0)
   477  					if i != n-1 {
   478  						c128.DscalUnitary(beta, ci[1:])
   479  					}
   480  				}
   481  			}
   482  		} else {
   483  			if beta == 0 {
   484  				for i := 0; i < n; i++ {
   485  					ci := c[i*ldc : i*ldc+i+1]
   486  					for j := range ci {
   487  						ci[j] = 0
   488  					}
   489  				}
   490  			} else {
   491  				for i := 0; i < n; i++ {
   492  					ci := c[i*ldc : i*ldc+i+1]
   493  					if i != 0 {
   494  						c128.DscalUnitary(beta, ci[:i])
   495  					}
   496  					ci[i] = complex(beta*real(ci[i]), 0)
   497  				}
   498  			}
   499  		}
   500  		return
   501  	}
   502  
   503  	calpha := complex(alpha, 0)
   504  	if trans == blas.NoTrans {
   505  		// Form  C = alpha*A*Aᴴ + beta*C.
   506  		cbeta := complex(beta, 0)
   507  		if uplo == blas.Upper {
   508  			for i := 0; i < n; i++ {
   509  				ci := c[i*ldc+i : i*ldc+n]
   510  				ai := a[i*lda : i*lda+k]
   511  				switch {
   512  				case beta == 0:
   513  					// Handle the i-th diagonal element of C.
   514  					ci[0] = complex(alpha*real(c128.DotcUnitary(ai, ai)), 0)
   515  					// Handle the remaining elements on the i-th row of C.
   516  					for jc := range ci[1:] {
   517  						j := i + 1 + jc
   518  						ci[jc+1] = calpha * c128.DotcUnitary(a[j*lda:j*lda+k], ai)
   519  					}
   520  				case beta != 1:
   521  					cii := calpha*c128.DotcUnitary(ai, ai) + cbeta*ci[0]
   522  					ci[0] = complex(real(cii), 0)
   523  					for jc, cij := range ci[1:] {
   524  						j := i + 1 + jc
   525  						ci[jc+1] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij
   526  					}
   527  				default:
   528  					cii := calpha*c128.DotcUnitary(ai, ai) + ci[0]
   529  					ci[0] = complex(real(cii), 0)
   530  					for jc, cij := range ci[1:] {
   531  						j := i + 1 + jc
   532  						ci[jc+1] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cij
   533  					}
   534  				}
   535  			}
   536  		} else {
   537  			for i := 0; i < n; i++ {
   538  				ci := c[i*ldc : i*ldc+i+1]
   539  				ai := a[i*lda : i*lda+k]
   540  				switch {
   541  				case beta == 0:
   542  					// Handle the first i-1 elements on the i-th row of C.
   543  					for j := range ci[:i] {
   544  						ci[j] = calpha * c128.DotcUnitary(a[j*lda:j*lda+k], ai)
   545  					}
   546  					// Handle the i-th diagonal element of C.
   547  					ci[i] = complex(alpha*real(c128.DotcUnitary(ai, ai)), 0)
   548  				case beta != 1:
   549  					for j, cij := range ci[:i] {
   550  						ci[j] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij
   551  					}
   552  					cii := calpha*c128.DotcUnitary(ai, ai) + cbeta*ci[i]
   553  					ci[i] = complex(real(cii), 0)
   554  				default:
   555  					for j, cij := range ci[:i] {
   556  						ci[j] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cij
   557  					}
   558  					cii := calpha*c128.DotcUnitary(ai, ai) + ci[i]
   559  					ci[i] = complex(real(cii), 0)
   560  				}
   561  			}
   562  		}
   563  	} else {
   564  		// Form  C = alpha*Aᴴ*A + beta*C.
   565  		if uplo == blas.Upper {
   566  			for i := 0; i < n; i++ {
   567  				ci := c[i*ldc+i : i*ldc+n]
   568  				switch {
   569  				case beta == 0:
   570  					for jc := range ci {
   571  						ci[jc] = 0
   572  					}
   573  				case beta != 1:
   574  					c128.DscalUnitary(beta, ci)
   575  					ci[0] = complex(real(ci[0]), 0)
   576  				default:
   577  					ci[0] = complex(real(ci[0]), 0)
   578  				}
   579  				for j := 0; j < k; j++ {
   580  					aji := cmplx.Conj(a[j*lda+i])
   581  					if aji != 0 {
   582  						c128.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci)
   583  					}
   584  				}
   585  				c[i*ldc+i] = complex(real(c[i*ldc+i]), 0)
   586  			}
   587  		} else {
   588  			for i := 0; i < n; i++ {
   589  				ci := c[i*ldc : i*ldc+i+1]
   590  				switch {
   591  				case beta == 0:
   592  					for j := range ci {
   593  						ci[j] = 0
   594  					}
   595  				case beta != 1:
   596  					c128.DscalUnitary(beta, ci)
   597  					ci[i] = complex(real(ci[i]), 0)
   598  				default:
   599  					ci[i] = complex(real(ci[i]), 0)
   600  				}
   601  				for j := 0; j < k; j++ {
   602  					aji := cmplx.Conj(a[j*lda+i])
   603  					if aji != 0 {
   604  						c128.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci)
   605  					}
   606  				}
   607  				c[i*ldc+i] = complex(real(c[i*ldc+i]), 0)
   608  			}
   609  		}
   610  	}
   611  }
   612  
   613  // Zher2k performs one of the hermitian rank-2k operations
   614  //
   615  //	C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C  if trans == blas.NoTrans
   616  //	C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C  if trans == blas.ConjTrans
   617  //
   618  // where alpha and beta are scalars with beta real, C is an n×n hermitian matrix
   619  // and A and B are n×k matrices in the first case and k×n matrices in the second case.
   620  //
   621  // The imaginary parts of the diagonal elements of C are assumed to be zero, and
   622  // on return they will be set to zero.
   623  func (Implementation) Zher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) {
   624  	var row, col int
   625  	switch trans {
   626  	default:
   627  		panic(badTranspose)
   628  	case blas.NoTrans:
   629  		row, col = n, k
   630  	case blas.ConjTrans:
   631  		row, col = k, n
   632  	}
   633  	switch {
   634  	case uplo != blas.Lower && uplo != blas.Upper:
   635  		panic(badUplo)
   636  	case n < 0:
   637  		panic(nLT0)
   638  	case k < 0:
   639  		panic(kLT0)
   640  	case lda < max(1, col):
   641  		panic(badLdA)
   642  	case ldb < max(1, col):
   643  		panic(badLdB)
   644  	case ldc < max(1, n):
   645  		panic(badLdC)
   646  	}
   647  
   648  	// Quick return if possible.
   649  	if n == 0 {
   650  		return
   651  	}
   652  
   653  	// For zero matrix size the following slice length checks are trivially satisfied.
   654  	if len(a) < (row-1)*lda+col {
   655  		panic(shortA)
   656  	}
   657  	if len(b) < (row-1)*ldb+col {
   658  		panic(shortB)
   659  	}
   660  	if len(c) < (n-1)*ldc+n {
   661  		panic(shortC)
   662  	}
   663  
   664  	// Quick return if possible.
   665  	if (alpha == 0 || k == 0) && beta == 1 {
   666  		return
   667  	}
   668  
   669  	if alpha == 0 {
   670  		if uplo == blas.Upper {
   671  			if beta == 0 {
   672  				for i := 0; i < n; i++ {
   673  					ci := c[i*ldc+i : i*ldc+n]
   674  					for j := range ci {
   675  						ci[j] = 0
   676  					}
   677  				}
   678  			} else {
   679  				for i := 0; i < n; i++ {
   680  					ci := c[i*ldc+i : i*ldc+n]
   681  					ci[0] = complex(beta*real(ci[0]), 0)
   682  					if i != n-1 {
   683  						c128.DscalUnitary(beta, ci[1:])
   684  					}
   685  				}
   686  			}
   687  		} else {
   688  			if beta == 0 {
   689  				for i := 0; i < n; i++ {
   690  					ci := c[i*ldc : i*ldc+i+1]
   691  					for j := range ci {
   692  						ci[j] = 0
   693  					}
   694  				}
   695  			} else {
   696  				for i := 0; i < n; i++ {
   697  					ci := c[i*ldc : i*ldc+i+1]
   698  					if i != 0 {
   699  						c128.DscalUnitary(beta, ci[:i])
   700  					}
   701  					ci[i] = complex(beta*real(ci[i]), 0)
   702  				}
   703  			}
   704  		}
   705  		return
   706  	}
   707  
   708  	conjalpha := cmplx.Conj(alpha)
   709  	cbeta := complex(beta, 0)
   710  	if trans == blas.NoTrans {
   711  		// Form  C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C.
   712  		if uplo == blas.Upper {
   713  			for i := 0; i < n; i++ {
   714  				ci := c[i*ldc+i+1 : i*ldc+n]
   715  				ai := a[i*lda : i*lda+k]
   716  				bi := b[i*ldb : i*ldb+k]
   717  				if beta == 0 {
   718  					cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi)
   719  					c[i*ldc+i] = complex(real(cii), 0)
   720  					for jc := range ci {
   721  						j := i + 1 + jc
   722  						ci[jc] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi)
   723  					}
   724  				} else {
   725  					cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i]
   726  					c[i*ldc+i] = complex(real(cii), 0)
   727  					for jc, cij := range ci {
   728  						j := i + 1 + jc
   729  						ci[jc] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij
   730  					}
   731  				}
   732  			}
   733  		} else {
   734  			for i := 0; i < n; i++ {
   735  				ci := c[i*ldc : i*ldc+i]
   736  				ai := a[i*lda : i*lda+k]
   737  				bi := b[i*ldb : i*ldb+k]
   738  				if beta == 0 {
   739  					for j := range ci {
   740  						ci[j] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi)
   741  					}
   742  					cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi)
   743  					c[i*ldc+i] = complex(real(cii), 0)
   744  				} else {
   745  					for j, cij := range ci {
   746  						ci[j] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij
   747  					}
   748  					cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i]
   749  					c[i*ldc+i] = complex(real(cii), 0)
   750  				}
   751  			}
   752  		}
   753  	} else {
   754  		// Form  C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C.
   755  		if uplo == blas.Upper {
   756  			for i := 0; i < n; i++ {
   757  				ci := c[i*ldc+i : i*ldc+n]
   758  				switch {
   759  				case beta == 0:
   760  					for jc := range ci {
   761  						ci[jc] = 0
   762  					}
   763  				case beta != 1:
   764  					c128.DscalUnitary(beta, ci)
   765  					ci[0] = complex(real(ci[0]), 0)
   766  				default:
   767  					ci[0] = complex(real(ci[0]), 0)
   768  				}
   769  				for j := 0; j < k; j++ {
   770  					aji := a[j*lda+i]
   771  					bji := b[j*ldb+i]
   772  					if aji != 0 {
   773  						c128.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci)
   774  					}
   775  					if bji != 0 {
   776  						c128.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci)
   777  					}
   778  				}
   779  				ci[0] = complex(real(ci[0]), 0)
   780  			}
   781  		} else {
   782  			for i := 0; i < n; i++ {
   783  				ci := c[i*ldc : i*ldc+i+1]
   784  				switch {
   785  				case beta == 0:
   786  					for j := range ci {
   787  						ci[j] = 0
   788  					}
   789  				case beta != 1:
   790  					c128.DscalUnitary(beta, ci)
   791  					ci[i] = complex(real(ci[i]), 0)
   792  				default:
   793  					ci[i] = complex(real(ci[i]), 0)
   794  				}
   795  				for j := 0; j < k; j++ {
   796  					aji := a[j*lda+i]
   797  					bji := b[j*ldb+i]
   798  					if aji != 0 {
   799  						c128.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci)
   800  					}
   801  					if bji != 0 {
   802  						c128.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci)
   803  					}
   804  				}
   805  				ci[i] = complex(real(ci[i]), 0)
   806  			}
   807  		}
   808  	}
   809  }
   810  
   811  // Zsymm performs one of the matrix-matrix operations
   812  //
   813  //	C = alpha*A*B + beta*C  if side == blas.Left
   814  //	C = alpha*B*A + beta*C  if side == blas.Right
   815  //
   816  // where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B
   817  // and C are m×n matrices.
   818  func (Implementation) Zsymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
   819  	na := m
   820  	if side == blas.Right {
   821  		na = n
   822  	}
   823  	switch {
   824  	case side != blas.Left && side != blas.Right:
   825  		panic(badSide)
   826  	case uplo != blas.Lower && uplo != blas.Upper:
   827  		panic(badUplo)
   828  	case m < 0:
   829  		panic(mLT0)
   830  	case n < 0:
   831  		panic(nLT0)
   832  	case lda < max(1, na):
   833  		panic(badLdA)
   834  	case ldb < max(1, n):
   835  		panic(badLdB)
   836  	case ldc < max(1, n):
   837  		panic(badLdC)
   838  	}
   839  
   840  	// Quick return if possible.
   841  	if m == 0 || n == 0 {
   842  		return
   843  	}
   844  
   845  	// For zero matrix size the following slice length checks are trivially satisfied.
   846  	if len(a) < lda*(na-1)+na {
   847  		panic(shortA)
   848  	}
   849  	if len(b) < ldb*(m-1)+n {
   850  		panic(shortB)
   851  	}
   852  	if len(c) < ldc*(m-1)+n {
   853  		panic(shortC)
   854  	}
   855  
   856  	// Quick return if possible.
   857  	if alpha == 0 && beta == 1 {
   858  		return
   859  	}
   860  
   861  	if alpha == 0 {
   862  		if beta == 0 {
   863  			for i := 0; i < m; i++ {
   864  				ci := c[i*ldc : i*ldc+n]
   865  				for j := range ci {
   866  					ci[j] = 0
   867  				}
   868  			}
   869  		} else {
   870  			for i := 0; i < m; i++ {
   871  				ci := c[i*ldc : i*ldc+n]
   872  				c128.ScalUnitary(beta, ci)
   873  			}
   874  		}
   875  		return
   876  	}
   877  
   878  	if side == blas.Left {
   879  		// Form  C = alpha*A*B + beta*C.
   880  		for i := 0; i < m; i++ {
   881  			atmp := alpha * a[i*lda+i]
   882  			bi := b[i*ldb : i*ldb+n]
   883  			ci := c[i*ldc : i*ldc+n]
   884  			if beta == 0 {
   885  				for j, bij := range bi {
   886  					ci[j] = atmp * bij
   887  				}
   888  			} else {
   889  				for j, bij := range bi {
   890  					ci[j] = atmp*bij + beta*ci[j]
   891  				}
   892  			}
   893  			if uplo == blas.Upper {
   894  				for k := 0; k < i; k++ {
   895  					atmp = alpha * a[k*lda+i]
   896  					c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   897  				}
   898  				for k := i + 1; k < m; k++ {
   899  					atmp = alpha * a[i*lda+k]
   900  					c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   901  				}
   902  			} else {
   903  				for k := 0; k < i; k++ {
   904  					atmp = alpha * a[i*lda+k]
   905  					c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   906  				}
   907  				for k := i + 1; k < m; k++ {
   908  					atmp = alpha * a[k*lda+i]
   909  					c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci)
   910  				}
   911  			}
   912  		}
   913  	} else {
   914  		// Form  C = alpha*B*A + beta*C.
   915  		if uplo == blas.Upper {
   916  			for i := 0; i < m; i++ {
   917  				for j := n - 1; j >= 0; j-- {
   918  					abij := alpha * b[i*ldb+j]
   919  					aj := a[j*lda+j+1 : j*lda+n]
   920  					bi := b[i*ldb+j+1 : i*ldb+n]
   921  					ci := c[i*ldc+j+1 : i*ldc+n]
   922  					var tmp complex128
   923  					for k, ajk := range aj {
   924  						ci[k] += abij * ajk
   925  						tmp += bi[k] * ajk
   926  					}
   927  					if beta == 0 {
   928  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp
   929  					} else {
   930  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j]
   931  					}
   932  				}
   933  			}
   934  		} else {
   935  			for i := 0; i < m; i++ {
   936  				for j := 0; j < n; j++ {
   937  					abij := alpha * b[i*ldb+j]
   938  					aj := a[j*lda : j*lda+j]
   939  					bi := b[i*ldb : i*ldb+j]
   940  					ci := c[i*ldc : i*ldc+j]
   941  					var tmp complex128
   942  					for k, ajk := range aj {
   943  						ci[k] += abij * ajk
   944  						tmp += bi[k] * ajk
   945  					}
   946  					if beta == 0 {
   947  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp
   948  					} else {
   949  						c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j]
   950  					}
   951  				}
   952  			}
   953  		}
   954  	}
   955  }
   956  
   957  // Zsyrk performs one of the symmetric rank-k operations
   958  //
   959  //	C = alpha*A*Aᵀ + beta*C  if trans == blas.NoTrans
   960  //	C = alpha*Aᵀ*A + beta*C  if trans == blas.Trans
   961  //
   962  // where alpha and beta are scalars, C is an n×n symmetric matrix and A is
   963  // an n×k matrix in the first case and a k×n matrix in the second case.
   964  func (Implementation) Zsyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) {
   965  	var rowA, colA int
   966  	switch trans {
   967  	default:
   968  		panic(badTranspose)
   969  	case blas.NoTrans:
   970  		rowA, colA = n, k
   971  	case blas.Trans:
   972  		rowA, colA = k, n
   973  	}
   974  	switch {
   975  	case uplo != blas.Lower && uplo != blas.Upper:
   976  		panic(badUplo)
   977  	case n < 0:
   978  		panic(nLT0)
   979  	case k < 0:
   980  		panic(kLT0)
   981  	case lda < max(1, colA):
   982  		panic(badLdA)
   983  	case ldc < max(1, n):
   984  		panic(badLdC)
   985  	}
   986  
   987  	// Quick return if possible.
   988  	if n == 0 {
   989  		return
   990  	}
   991  
   992  	// For zero matrix size the following slice length checks are trivially satisfied.
   993  	if len(a) < (rowA-1)*lda+colA {
   994  		panic(shortA)
   995  	}
   996  	if len(c) < (n-1)*ldc+n {
   997  		panic(shortC)
   998  	}
   999  
  1000  	// Quick return if possible.
  1001  	if (alpha == 0 || k == 0) && beta == 1 {
  1002  		return
  1003  	}
  1004  
  1005  	if alpha == 0 {
  1006  		if uplo == blas.Upper {
  1007  			if beta == 0 {
  1008  				for i := 0; i < n; i++ {
  1009  					ci := c[i*ldc+i : i*ldc+n]
  1010  					for j := range ci {
  1011  						ci[j] = 0
  1012  					}
  1013  				}
  1014  			} else {
  1015  				for i := 0; i < n; i++ {
  1016  					ci := c[i*ldc+i : i*ldc+n]
  1017  					c128.ScalUnitary(beta, ci)
  1018  				}
  1019  			}
  1020  		} else {
  1021  			if beta == 0 {
  1022  				for i := 0; i < n; i++ {
  1023  					ci := c[i*ldc : i*ldc+i+1]
  1024  					for j := range ci {
  1025  						ci[j] = 0
  1026  					}
  1027  				}
  1028  			} else {
  1029  				for i := 0; i < n; i++ {
  1030  					ci := c[i*ldc : i*ldc+i+1]
  1031  					c128.ScalUnitary(beta, ci)
  1032  				}
  1033  			}
  1034  		}
  1035  		return
  1036  	}
  1037  
  1038  	if trans == blas.NoTrans {
  1039  		// Form  C = alpha*A*Aᵀ + beta*C.
  1040  		if uplo == blas.Upper {
  1041  			for i := 0; i < n; i++ {
  1042  				ci := c[i*ldc+i : i*ldc+n]
  1043  				ai := a[i*lda : i*lda+k]
  1044  				if beta == 0 {
  1045  					for jc := range ci {
  1046  						j := i + jc
  1047  						ci[jc] = alpha * c128.DotuUnitary(ai, a[j*lda:j*lda+k])
  1048  					}
  1049  				} else {
  1050  					for jc, cij := range ci {
  1051  						j := i + jc
  1052  						ci[jc] = beta*cij + alpha*c128.DotuUnitary(ai, a[j*lda:j*lda+k])
  1053  					}
  1054  				}
  1055  			}
  1056  		} else {
  1057  			for i := 0; i < n; i++ {
  1058  				ci := c[i*ldc : i*ldc+i+1]
  1059  				ai := a[i*lda : i*lda+k]
  1060  				if beta == 0 {
  1061  					for j := range ci {
  1062  						ci[j] = alpha * c128.DotuUnitary(ai, a[j*lda:j*lda+k])
  1063  					}
  1064  				} else {
  1065  					for j, cij := range ci {
  1066  						ci[j] = beta*cij + alpha*c128.DotuUnitary(ai, a[j*lda:j*lda+k])
  1067  					}
  1068  				}
  1069  			}
  1070  		}
  1071  	} else {
  1072  		// Form  C = alpha*Aᵀ*A + beta*C.
  1073  		if uplo == blas.Upper {
  1074  			for i := 0; i < n; i++ {
  1075  				ci := c[i*ldc+i : i*ldc+n]
  1076  				switch {
  1077  				case beta == 0:
  1078  					for jc := range ci {
  1079  						ci[jc] = 0
  1080  					}
  1081  				case beta != 1:
  1082  					for jc := range ci {
  1083  						ci[jc] *= beta
  1084  					}
  1085  				}
  1086  				for j := 0; j < k; j++ {
  1087  					aji := a[j*lda+i]
  1088  					if aji != 0 {
  1089  						c128.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci)
  1090  					}
  1091  				}
  1092  			}
  1093  		} else {
  1094  			for i := 0; i < n; i++ {
  1095  				ci := c[i*ldc : i*ldc+i+1]
  1096  				switch {
  1097  				case beta == 0:
  1098  					for j := range ci {
  1099  						ci[j] = 0
  1100  					}
  1101  				case beta != 1:
  1102  					for j := range ci {
  1103  						ci[j] *= beta
  1104  					}
  1105  				}
  1106  				for j := 0; j < k; j++ {
  1107  					aji := a[j*lda+i]
  1108  					if aji != 0 {
  1109  						c128.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci)
  1110  					}
  1111  				}
  1112  			}
  1113  		}
  1114  	}
  1115  }
  1116  
  1117  // Zsyr2k performs one of the symmetric rank-2k operations
  1118  //
  1119  //	C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C  if trans == blas.NoTrans
  1120  //	C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C  if trans == blas.Trans
  1121  //
  1122  // where alpha and beta are scalars, C is an n×n symmetric matrix and A and B
  1123  // are n×k matrices in the first case and k×n matrices in the second case.
  1124  func (Implementation) Zsyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) {
  1125  	var row, col int
  1126  	switch trans {
  1127  	default:
  1128  		panic(badTranspose)
  1129  	case blas.NoTrans:
  1130  		row, col = n, k
  1131  	case blas.Trans:
  1132  		row, col = k, n
  1133  	}
  1134  	switch {
  1135  	case uplo != blas.Lower && uplo != blas.Upper:
  1136  		panic(badUplo)
  1137  	case n < 0:
  1138  		panic(nLT0)
  1139  	case k < 0:
  1140  		panic(kLT0)
  1141  	case lda < max(1, col):
  1142  		panic(badLdA)
  1143  	case ldb < max(1, col):
  1144  		panic(badLdB)
  1145  	case ldc < max(1, n):
  1146  		panic(badLdC)
  1147  	}
  1148  
  1149  	// Quick return if possible.
  1150  	if n == 0 {
  1151  		return
  1152  	}
  1153  
  1154  	// For zero matrix size the following slice length checks are trivially satisfied.
  1155  	if len(a) < (row-1)*lda+col {
  1156  		panic(shortA)
  1157  	}
  1158  	if len(b) < (row-1)*ldb+col {
  1159  		panic(shortB)
  1160  	}
  1161  	if len(c) < (n-1)*ldc+n {
  1162  		panic(shortC)
  1163  	}
  1164  
  1165  	// Quick return if possible.
  1166  	if (alpha == 0 || k == 0) && beta == 1 {
  1167  		return
  1168  	}
  1169  
  1170  	if alpha == 0 {
  1171  		if uplo == blas.Upper {
  1172  			if beta == 0 {
  1173  				for i := 0; i < n; i++ {
  1174  					ci := c[i*ldc+i : i*ldc+n]
  1175  					for j := range ci {
  1176  						ci[j] = 0
  1177  					}
  1178  				}
  1179  			} else {
  1180  				for i := 0; i < n; i++ {
  1181  					ci := c[i*ldc+i : i*ldc+n]
  1182  					c128.ScalUnitary(beta, ci)
  1183  				}
  1184  			}
  1185  		} else {
  1186  			if beta == 0 {
  1187  				for i := 0; i < n; i++ {
  1188  					ci := c[i*ldc : i*ldc+i+1]
  1189  					for j := range ci {
  1190  						ci[j] = 0
  1191  					}
  1192  				}
  1193  			} else {
  1194  				for i := 0; i < n; i++ {
  1195  					ci := c[i*ldc : i*ldc+i+1]
  1196  					c128.ScalUnitary(beta, ci)
  1197  				}
  1198  			}
  1199  		}
  1200  		return
  1201  	}
  1202  
  1203  	if trans == blas.NoTrans {
  1204  		// Form  C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C.
  1205  		if uplo == blas.Upper {
  1206  			for i := 0; i < n; i++ {
  1207  				ci := c[i*ldc+i : i*ldc+n]
  1208  				ai := a[i*lda : i*lda+k]
  1209  				bi := b[i*ldb : i*ldb+k]
  1210  				if beta == 0 {
  1211  					for jc := range ci {
  1212  						j := i + jc
  1213  						ci[jc] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k])
  1214  					}
  1215  				} else {
  1216  					for jc, cij := range ci {
  1217  						j := i + jc
  1218  						ci[jc] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij
  1219  					}
  1220  				}
  1221  			}
  1222  		} else {
  1223  			for i := 0; i < n; i++ {
  1224  				ci := c[i*ldc : i*ldc+i+1]
  1225  				ai := a[i*lda : i*lda+k]
  1226  				bi := b[i*ldb : i*ldb+k]
  1227  				if beta == 0 {
  1228  					for j := range ci {
  1229  						ci[j] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k])
  1230  					}
  1231  				} else {
  1232  					for j, cij := range ci {
  1233  						ci[j] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij
  1234  					}
  1235  				}
  1236  			}
  1237  		}
  1238  	} else {
  1239  		// Form  C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C.
  1240  		if uplo == blas.Upper {
  1241  			for i := 0; i < n; i++ {
  1242  				ci := c[i*ldc+i : i*ldc+n]
  1243  				switch {
  1244  				case beta == 0:
  1245  					for jc := range ci {
  1246  						ci[jc] = 0
  1247  					}
  1248  				case beta != 1:
  1249  					for jc := range ci {
  1250  						ci[jc] *= beta
  1251  					}
  1252  				}
  1253  				for j := 0; j < k; j++ {
  1254  					aji := a[j*lda+i]
  1255  					bji := b[j*ldb+i]
  1256  					if aji != 0 {
  1257  						c128.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci)
  1258  					}
  1259  					if bji != 0 {
  1260  						c128.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci)
  1261  					}
  1262  				}
  1263  			}
  1264  		} else {
  1265  			for i := 0; i < n; i++ {
  1266  				ci := c[i*ldc : i*ldc+i+1]
  1267  				switch {
  1268  				case beta == 0:
  1269  					for j := range ci {
  1270  						ci[j] = 0
  1271  					}
  1272  				case beta != 1:
  1273  					for j := range ci {
  1274  						ci[j] *= beta
  1275  					}
  1276  				}
  1277  				for j := 0; j < k; j++ {
  1278  					aji := a[j*lda+i]
  1279  					bji := b[j*ldb+i]
  1280  					if aji != 0 {
  1281  						c128.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci)
  1282  					}
  1283  					if bji != 0 {
  1284  						c128.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci)
  1285  					}
  1286  				}
  1287  			}
  1288  		}
  1289  	}
  1290  }
  1291  
  1292  // Ztrmm performs one of the matrix-matrix operations
  1293  //
  1294  //	B = alpha * op(A) * B  if side == blas.Left,
  1295  //	B = alpha * B * op(A)  if side == blas.Right,
  1296  //
  1297  // where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit,
  1298  // upper or lower triangular matrix and op(A) is one of
  1299  //
  1300  //	op(A) = A   if trans == blas.NoTrans,
  1301  //	op(A) = Aᵀ  if trans == blas.Trans,
  1302  //	op(A) = Aᴴ  if trans == blas.ConjTrans.
  1303  func (Implementation) Ztrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
  1304  	na := m
  1305  	if side == blas.Right {
  1306  		na = n
  1307  	}
  1308  	switch {
  1309  	case side != blas.Left && side != blas.Right:
  1310  		panic(badSide)
  1311  	case uplo != blas.Lower && uplo != blas.Upper:
  1312  		panic(badUplo)
  1313  	case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans:
  1314  		panic(badTranspose)
  1315  	case diag != blas.Unit && diag != blas.NonUnit:
  1316  		panic(badDiag)
  1317  	case m < 0:
  1318  		panic(mLT0)
  1319  	case n < 0:
  1320  		panic(nLT0)
  1321  	case lda < max(1, na):
  1322  		panic(badLdA)
  1323  	case ldb < max(1, n):
  1324  		panic(badLdB)
  1325  	}
  1326  
  1327  	// Quick return if possible.
  1328  	if m == 0 || n == 0 {
  1329  		return
  1330  	}
  1331  
  1332  	// For zero matrix size the following slice length checks are trivially satisfied.
  1333  	if len(a) < (na-1)*lda+na {
  1334  		panic(shortA)
  1335  	}
  1336  	if len(b) < (m-1)*ldb+n {
  1337  		panic(shortB)
  1338  	}
  1339  
  1340  	// Quick return if possible.
  1341  	if alpha == 0 {
  1342  		for i := 0; i < m; i++ {
  1343  			bi := b[i*ldb : i*ldb+n]
  1344  			for j := range bi {
  1345  				bi[j] = 0
  1346  			}
  1347  		}
  1348  		return
  1349  	}
  1350  
  1351  	noConj := trans != blas.ConjTrans
  1352  	noUnit := diag == blas.NonUnit
  1353  	if side == blas.Left {
  1354  		if trans == blas.NoTrans {
  1355  			// Form B = alpha*A*B.
  1356  			if uplo == blas.Upper {
  1357  				for i := 0; i < m; i++ {
  1358  					aii := alpha
  1359  					if noUnit {
  1360  						aii *= a[i*lda+i]
  1361  					}
  1362  					bi := b[i*ldb : i*ldb+n]
  1363  					for j := range bi {
  1364  						bi[j] *= aii
  1365  					}
  1366  					for ja, aij := range a[i*lda+i+1 : i*lda+m] {
  1367  						j := ja + i + 1
  1368  						if aij != 0 {
  1369  							c128.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi)
  1370  						}
  1371  					}
  1372  				}
  1373  			} else {
  1374  				for i := m - 1; i >= 0; i-- {
  1375  					aii := alpha
  1376  					if noUnit {
  1377  						aii *= a[i*lda+i]
  1378  					}
  1379  					bi := b[i*ldb : i*ldb+n]
  1380  					for j := range bi {
  1381  						bi[j] *= aii
  1382  					}
  1383  					for j, aij := range a[i*lda : i*lda+i] {
  1384  						if aij != 0 {
  1385  							c128.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi)
  1386  						}
  1387  					}
  1388  				}
  1389  			}
  1390  		} else {
  1391  			// Form B = alpha*Aᵀ*B  or  B = alpha*Aᴴ*B.
  1392  			if uplo == blas.Upper {
  1393  				for k := m - 1; k >= 0; k-- {
  1394  					bk := b[k*ldb : k*ldb+n]
  1395  					for ja, ajk := range a[k*lda+k+1 : k*lda+m] {
  1396  						if ajk == 0 {
  1397  							continue
  1398  						}
  1399  						j := k + 1 + ja
  1400  						if noConj {
  1401  							c128.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n])
  1402  						} else {
  1403  							c128.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n])
  1404  						}
  1405  					}
  1406  					akk := alpha
  1407  					if noUnit {
  1408  						if noConj {
  1409  							akk *= a[k*lda+k]
  1410  						} else {
  1411  							akk *= cmplx.Conj(a[k*lda+k])
  1412  						}
  1413  					}
  1414  					if akk != 1 {
  1415  						c128.ScalUnitary(akk, bk)
  1416  					}
  1417  				}
  1418  			} else {
  1419  				for k := 0; k < m; k++ {
  1420  					bk := b[k*ldb : k*ldb+n]
  1421  					for j, ajk := range a[k*lda : k*lda+k] {
  1422  						if ajk == 0 {
  1423  							continue
  1424  						}
  1425  						if noConj {
  1426  							c128.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n])
  1427  						} else {
  1428  							c128.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n])
  1429  						}
  1430  					}
  1431  					akk := alpha
  1432  					if noUnit {
  1433  						if noConj {
  1434  							akk *= a[k*lda+k]
  1435  						} else {
  1436  							akk *= cmplx.Conj(a[k*lda+k])
  1437  						}
  1438  					}
  1439  					if akk != 1 {
  1440  						c128.ScalUnitary(akk, bk)
  1441  					}
  1442  				}
  1443  			}
  1444  		}
  1445  	} else {
  1446  		if trans == blas.NoTrans {
  1447  			// Form B = alpha*B*A.
  1448  			if uplo == blas.Upper {
  1449  				for i := 0; i < m; i++ {
  1450  					bi := b[i*ldb : i*ldb+n]
  1451  					for k := n - 1; k >= 0; k-- {
  1452  						abik := alpha * bi[k]
  1453  						if abik == 0 {
  1454  							continue
  1455  						}
  1456  						bi[k] = abik
  1457  						if noUnit {
  1458  							bi[k] *= a[k*lda+k]
  1459  						}
  1460  						c128.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:])
  1461  					}
  1462  				}
  1463  			} else {
  1464  				for i := 0; i < m; i++ {
  1465  					bi := b[i*ldb : i*ldb+n]
  1466  					for k := 0; k < n; k++ {
  1467  						abik := alpha * bi[k]
  1468  						if abik == 0 {
  1469  							continue
  1470  						}
  1471  						bi[k] = abik
  1472  						if noUnit {
  1473  							bi[k] *= a[k*lda+k]
  1474  						}
  1475  						c128.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k])
  1476  					}
  1477  				}
  1478  			}
  1479  		} else {
  1480  			// Form B = alpha*B*Aᵀ  or  B = alpha*B*Aᴴ.
  1481  			if uplo == blas.Upper {
  1482  				for i := 0; i < m; i++ {
  1483  					bi := b[i*ldb : i*ldb+n]
  1484  					for j, bij := range bi {
  1485  						if noConj {
  1486  							if noUnit {
  1487  								bij *= a[j*lda+j]
  1488  							}
  1489  							bij += c128.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1490  						} else {
  1491  							if noUnit {
  1492  								bij *= cmplx.Conj(a[j*lda+j])
  1493  							}
  1494  							bij += c128.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1495  						}
  1496  						bi[j] = alpha * bij
  1497  					}
  1498  				}
  1499  			} else {
  1500  				for i := 0; i < m; i++ {
  1501  					bi := b[i*ldb : i*ldb+n]
  1502  					for j := n - 1; j >= 0; j-- {
  1503  						bij := bi[j]
  1504  						if noConj {
  1505  							if noUnit {
  1506  								bij *= a[j*lda+j]
  1507  							}
  1508  							bij += c128.DotuUnitary(a[j*lda:j*lda+j], bi[:j])
  1509  						} else {
  1510  							if noUnit {
  1511  								bij *= cmplx.Conj(a[j*lda+j])
  1512  							}
  1513  							bij += c128.DotcUnitary(a[j*lda:j*lda+j], bi[:j])
  1514  						}
  1515  						bi[j] = alpha * bij
  1516  					}
  1517  				}
  1518  			}
  1519  		}
  1520  	}
  1521  }
  1522  
  1523  // Ztrsm solves one of the matrix equations
  1524  //
  1525  //	op(A) * X = alpha * B  if side == blas.Left,
  1526  //	X * op(A) = alpha * B  if side == blas.Right,
  1527  //
  1528  // where alpha is a scalar, X and B are m×n matrices, A is a unit or
  1529  // non-unit, upper or lower triangular matrix and op(A) is one of
  1530  //
  1531  //	op(A) = A   if transA == blas.NoTrans,
  1532  //	op(A) = Aᵀ  if transA == blas.Trans,
  1533  //	op(A) = Aᴴ  if transA == blas.ConjTrans.
  1534  //
  1535  // On return the matrix X is overwritten on B.
  1536  func (Implementation) Ztrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) {
  1537  	na := m
  1538  	if side == blas.Right {
  1539  		na = n
  1540  	}
  1541  	switch {
  1542  	case side != blas.Left && side != blas.Right:
  1543  		panic(badSide)
  1544  	case uplo != blas.Lower && uplo != blas.Upper:
  1545  		panic(badUplo)
  1546  	case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans:
  1547  		panic(badTranspose)
  1548  	case diag != blas.Unit && diag != blas.NonUnit:
  1549  		panic(badDiag)
  1550  	case m < 0:
  1551  		panic(mLT0)
  1552  	case n < 0:
  1553  		panic(nLT0)
  1554  	case lda < max(1, na):
  1555  		panic(badLdA)
  1556  	case ldb < max(1, n):
  1557  		panic(badLdB)
  1558  	}
  1559  
  1560  	// Quick return if possible.
  1561  	if m == 0 || n == 0 {
  1562  		return
  1563  	}
  1564  
  1565  	// For zero matrix size the following slice length checks are trivially satisfied.
  1566  	if len(a) < (na-1)*lda+na {
  1567  		panic(shortA)
  1568  	}
  1569  	if len(b) < (m-1)*ldb+n {
  1570  		panic(shortB)
  1571  	}
  1572  
  1573  	if alpha == 0 {
  1574  		for i := 0; i < m; i++ {
  1575  			for j := 0; j < n; j++ {
  1576  				b[i*ldb+j] = 0
  1577  			}
  1578  		}
  1579  		return
  1580  	}
  1581  
  1582  	noConj := transA != blas.ConjTrans
  1583  	noUnit := diag == blas.NonUnit
  1584  	if side == blas.Left {
  1585  		if transA == blas.NoTrans {
  1586  			// Form  B = alpha*inv(A)*B.
  1587  			if uplo == blas.Upper {
  1588  				for i := m - 1; i >= 0; i-- {
  1589  					bi := b[i*ldb : i*ldb+n]
  1590  					if alpha != 1 {
  1591  						c128.ScalUnitary(alpha, bi)
  1592  					}
  1593  					for ka, aik := range a[i*lda+i+1 : i*lda+m] {
  1594  						k := i + 1 + ka
  1595  						if aik != 0 {
  1596  							c128.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi)
  1597  						}
  1598  					}
  1599  					if noUnit {
  1600  						c128.ScalUnitary(1/a[i*lda+i], bi)
  1601  					}
  1602  				}
  1603  			} else {
  1604  				for i := 0; i < m; i++ {
  1605  					bi := b[i*ldb : i*ldb+n]
  1606  					if alpha != 1 {
  1607  						c128.ScalUnitary(alpha, bi)
  1608  					}
  1609  					for j, aij := range a[i*lda : i*lda+i] {
  1610  						if aij != 0 {
  1611  							c128.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi)
  1612  						}
  1613  					}
  1614  					if noUnit {
  1615  						c128.ScalUnitary(1/a[i*lda+i], bi)
  1616  					}
  1617  				}
  1618  			}
  1619  		} else {
  1620  			// Form  B = alpha*inv(Aᵀ)*B  or  B = alpha*inv(Aᴴ)*B.
  1621  			if uplo == blas.Upper {
  1622  				for i := 0; i < m; i++ {
  1623  					bi := b[i*ldb : i*ldb+n]
  1624  					if noUnit {
  1625  						if noConj {
  1626  							c128.ScalUnitary(1/a[i*lda+i], bi)
  1627  						} else {
  1628  							c128.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi)
  1629  						}
  1630  					}
  1631  					for ja, aij := range a[i*lda+i+1 : i*lda+m] {
  1632  						if aij == 0 {
  1633  							continue
  1634  						}
  1635  						j := i + 1 + ja
  1636  						if noConj {
  1637  							c128.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n])
  1638  						} else {
  1639  							c128.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n])
  1640  						}
  1641  					}
  1642  					if alpha != 1 {
  1643  						c128.ScalUnitary(alpha, bi)
  1644  					}
  1645  				}
  1646  			} else {
  1647  				for i := m - 1; i >= 0; i-- {
  1648  					bi := b[i*ldb : i*ldb+n]
  1649  					if noUnit {
  1650  						if noConj {
  1651  							c128.ScalUnitary(1/a[i*lda+i], bi)
  1652  						} else {
  1653  							c128.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi)
  1654  						}
  1655  					}
  1656  					for j, aij := range a[i*lda : i*lda+i] {
  1657  						if aij == 0 {
  1658  							continue
  1659  						}
  1660  						if noConj {
  1661  							c128.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n])
  1662  						} else {
  1663  							c128.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n])
  1664  						}
  1665  					}
  1666  					if alpha != 1 {
  1667  						c128.ScalUnitary(alpha, bi)
  1668  					}
  1669  				}
  1670  			}
  1671  		}
  1672  	} else {
  1673  		if transA == blas.NoTrans {
  1674  			// Form  B = alpha*B*inv(A).
  1675  			if uplo == blas.Upper {
  1676  				for i := 0; i < m; i++ {
  1677  					bi := b[i*ldb : i*ldb+n]
  1678  					if alpha != 1 {
  1679  						c128.ScalUnitary(alpha, bi)
  1680  					}
  1681  					for j, bij := range bi {
  1682  						if bij == 0 {
  1683  							continue
  1684  						}
  1685  						if noUnit {
  1686  							bi[j] /= a[j*lda+j]
  1687  						}
  1688  						c128.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1689  					}
  1690  				}
  1691  			} else {
  1692  				for i := 0; i < m; i++ {
  1693  					bi := b[i*ldb : i*ldb+n]
  1694  					if alpha != 1 {
  1695  						c128.ScalUnitary(alpha, bi)
  1696  					}
  1697  					for j := n - 1; j >= 0; j-- {
  1698  						if bi[j] == 0 {
  1699  							continue
  1700  						}
  1701  						if noUnit {
  1702  							bi[j] /= a[j*lda+j]
  1703  						}
  1704  						c128.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j])
  1705  					}
  1706  				}
  1707  			}
  1708  		} else {
  1709  			// Form  B = alpha*B*inv(Aᵀ)  or   B = alpha*B*inv(Aᴴ).
  1710  			if uplo == blas.Upper {
  1711  				for i := 0; i < m; i++ {
  1712  					bi := b[i*ldb : i*ldb+n]
  1713  					for j := n - 1; j >= 0; j-- {
  1714  						bij := alpha * bi[j]
  1715  						if noConj {
  1716  							bij -= c128.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1717  							if noUnit {
  1718  								bij /= a[j*lda+j]
  1719  							}
  1720  						} else {
  1721  							bij -= c128.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n])
  1722  							if noUnit {
  1723  								bij /= cmplx.Conj(a[j*lda+j])
  1724  							}
  1725  						}
  1726  						bi[j] = bij
  1727  					}
  1728  				}
  1729  			} else {
  1730  				for i := 0; i < m; i++ {
  1731  					bi := b[i*ldb : i*ldb+n]
  1732  					for j, bij := range bi {
  1733  						bij *= alpha
  1734  						if noConj {
  1735  							bij -= c128.DotuUnitary(a[j*lda:j*lda+j], bi[:j])
  1736  							if noUnit {
  1737  								bij /= a[j*lda+j]
  1738  							}
  1739  						} else {
  1740  							bij -= c128.DotcUnitary(a[j*lda:j*lda+j], bi[:j])
  1741  							if noUnit {
  1742  								bij /= cmplx.Conj(a[j*lda+j])
  1743  							}
  1744  						}
  1745  						bi[j] = bij
  1746  					}
  1747  				}
  1748  			}
  1749  		}
  1750  	}
  1751  }