gonum.org/v1/gonum@v0.14.0/blas/gonum/level3float64.go (about)

     1  // Copyright ©2014 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"gonum.org/v1/gonum/blas"
     9  	"gonum.org/v1/gonum/internal/asm/f64"
    10  )
    11  
    12  var _ blas.Float64Level3 = Implementation{}
    13  
    14  // Dtrsm solves one of the matrix equations
    15  //
    16  //	A * X = alpha * B   if tA == blas.NoTrans and side == blas.Left
    17  //	Aᵀ * X = alpha * B  if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
    18  //	X * A = alpha * B   if tA == blas.NoTrans and side == blas.Right
    19  //	X * Aᵀ = alpha * B  if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
    20  //
    21  // where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a
    22  // scalar.
    23  //
    24  // At entry to the function, X contains the values of B, and the result is
    25  // stored in-place into X.
    26  //
    27  // No check is made that A is invertible.
    28  func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
    29  	if s != blas.Left && s != blas.Right {
    30  		panic(badSide)
    31  	}
    32  	if ul != blas.Lower && ul != blas.Upper {
    33  		panic(badUplo)
    34  	}
    35  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
    36  		panic(badTranspose)
    37  	}
    38  	if d != blas.NonUnit && d != blas.Unit {
    39  		panic(badDiag)
    40  	}
    41  	if m < 0 {
    42  		panic(mLT0)
    43  	}
    44  	if n < 0 {
    45  		panic(nLT0)
    46  	}
    47  	k := n
    48  	if s == blas.Left {
    49  		k = m
    50  	}
    51  	if lda < max(1, k) {
    52  		panic(badLdA)
    53  	}
    54  	if ldb < max(1, n) {
    55  		panic(badLdB)
    56  	}
    57  
    58  	// Quick return if possible.
    59  	if m == 0 || n == 0 {
    60  		return
    61  	}
    62  
    63  	// For zero matrix size the following slice length checks are trivially satisfied.
    64  	if len(a) < lda*(k-1)+k {
    65  		panic(shortA)
    66  	}
    67  	if len(b) < ldb*(m-1)+n {
    68  		panic(shortB)
    69  	}
    70  
    71  	if alpha == 0 {
    72  		for i := 0; i < m; i++ {
    73  			btmp := b[i*ldb : i*ldb+n]
    74  			for j := range btmp {
    75  				btmp[j] = 0
    76  			}
    77  		}
    78  		return
    79  	}
    80  	nonUnit := d == blas.NonUnit
    81  	if s == blas.Left {
    82  		if tA == blas.NoTrans {
    83  			if ul == blas.Upper {
    84  				for i := m - 1; i >= 0; i-- {
    85  					btmp := b[i*ldb : i*ldb+n]
    86  					if alpha != 1 {
    87  						f64.ScalUnitary(alpha, btmp)
    88  					}
    89  					for ka, va := range a[i*lda+i+1 : i*lda+m] {
    90  						if va != 0 {
    91  							k := ka + i + 1
    92  							f64.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
    93  						}
    94  					}
    95  					if nonUnit {
    96  						tmp := 1 / a[i*lda+i]
    97  						f64.ScalUnitary(tmp, btmp)
    98  					}
    99  				}
   100  				return
   101  			}
   102  			for i := 0; i < m; i++ {
   103  				btmp := b[i*ldb : i*ldb+n]
   104  				if alpha != 1 {
   105  					f64.ScalUnitary(alpha, btmp)
   106  				}
   107  				for k, va := range a[i*lda : i*lda+i] {
   108  					if va != 0 {
   109  						f64.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp)
   110  					}
   111  				}
   112  				if nonUnit {
   113  					tmp := 1 / a[i*lda+i]
   114  					f64.ScalUnitary(tmp, btmp)
   115  				}
   116  			}
   117  			return
   118  		}
   119  		// Cases where a is transposed
   120  		if ul == blas.Upper {
   121  			for k := 0; k < m; k++ {
   122  				btmpk := b[k*ldb : k*ldb+n]
   123  				if nonUnit {
   124  					tmp := 1 / a[k*lda+k]
   125  					f64.ScalUnitary(tmp, btmpk)
   126  				}
   127  				for ia, va := range a[k*lda+k+1 : k*lda+m] {
   128  					if va != 0 {
   129  						i := ia + k + 1
   130  						f64.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
   131  					}
   132  				}
   133  				if alpha != 1 {
   134  					f64.ScalUnitary(alpha, btmpk)
   135  				}
   136  			}
   137  			return
   138  		}
   139  		for k := m - 1; k >= 0; k-- {
   140  			btmpk := b[k*ldb : k*ldb+n]
   141  			if nonUnit {
   142  				tmp := 1 / a[k*lda+k]
   143  				f64.ScalUnitary(tmp, btmpk)
   144  			}
   145  			for i, va := range a[k*lda : k*lda+k] {
   146  				if va != 0 {
   147  					f64.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n])
   148  				}
   149  			}
   150  			if alpha != 1 {
   151  				f64.ScalUnitary(alpha, btmpk)
   152  			}
   153  		}
   154  		return
   155  	}
   156  	// Cases where a is to the right of X.
   157  	if tA == blas.NoTrans {
   158  		if ul == blas.Upper {
   159  			for i := 0; i < m; i++ {
   160  				btmp := b[i*ldb : i*ldb+n]
   161  				if alpha != 1 {
   162  					f64.ScalUnitary(alpha, btmp)
   163  				}
   164  				for k, vb := range btmp {
   165  					if vb == 0 {
   166  						continue
   167  					}
   168  					if nonUnit {
   169  						btmp[k] /= a[k*lda+k]
   170  					}
   171  					f64.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n])
   172  				}
   173  			}
   174  			return
   175  		}
   176  		for i := 0; i < m; i++ {
   177  			btmp := b[i*ldb : i*ldb+n]
   178  			if alpha != 1 {
   179  				f64.ScalUnitary(alpha, btmp)
   180  			}
   181  			for k := n - 1; k >= 0; k-- {
   182  				if btmp[k] == 0 {
   183  					continue
   184  				}
   185  				if nonUnit {
   186  					btmp[k] /= a[k*lda+k]
   187  				}
   188  				f64.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k])
   189  			}
   190  		}
   191  		return
   192  	}
   193  	// Cases where a is transposed.
   194  	if ul == blas.Upper {
   195  		for i := 0; i < m; i++ {
   196  			btmp := b[i*ldb : i*ldb+n]
   197  			for j := n - 1; j >= 0; j-- {
   198  				tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:])
   199  				if nonUnit {
   200  					tmp /= a[j*lda+j]
   201  				}
   202  				btmp[j] = tmp
   203  			}
   204  		}
   205  		return
   206  	}
   207  	for i := 0; i < m; i++ {
   208  		btmp := b[i*ldb : i*ldb+n]
   209  		for j := 0; j < n; j++ {
   210  			tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
   211  			if nonUnit {
   212  				tmp /= a[j*lda+j]
   213  			}
   214  			btmp[j] = tmp
   215  		}
   216  	}
   217  }
   218  
   219  // Dsymm performs one of the matrix-matrix operations
   220  //
   221  //	C = alpha * A * B + beta * C  if side == blas.Left
   222  //	C = alpha * B * A + beta * C  if side == blas.Right
   223  //
   224  // where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha
   225  // is a scalar.
   226  func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
   227  	if s != blas.Right && s != blas.Left {
   228  		panic(badSide)
   229  	}
   230  	if ul != blas.Lower && ul != blas.Upper {
   231  		panic(badUplo)
   232  	}
   233  	if m < 0 {
   234  		panic(mLT0)
   235  	}
   236  	if n < 0 {
   237  		panic(nLT0)
   238  	}
   239  	k := n
   240  	if s == blas.Left {
   241  		k = m
   242  	}
   243  	if lda < max(1, k) {
   244  		panic(badLdA)
   245  	}
   246  	if ldb < max(1, n) {
   247  		panic(badLdB)
   248  	}
   249  	if ldc < max(1, n) {
   250  		panic(badLdC)
   251  	}
   252  
   253  	// Quick return if possible.
   254  	if m == 0 || n == 0 {
   255  		return
   256  	}
   257  
   258  	// For zero matrix size the following slice length checks are trivially satisfied.
   259  	if len(a) < lda*(k-1)+k {
   260  		panic(shortA)
   261  	}
   262  	if len(b) < ldb*(m-1)+n {
   263  		panic(shortB)
   264  	}
   265  	if len(c) < ldc*(m-1)+n {
   266  		panic(shortC)
   267  	}
   268  
   269  	// Quick return if possible.
   270  	if alpha == 0 && beta == 1 {
   271  		return
   272  	}
   273  
   274  	if beta == 0 {
   275  		for i := 0; i < m; i++ {
   276  			ctmp := c[i*ldc : i*ldc+n]
   277  			for j := range ctmp {
   278  				ctmp[j] = 0
   279  			}
   280  		}
   281  	}
   282  
   283  	if alpha == 0 {
   284  		if beta != 0 {
   285  			for i := 0; i < m; i++ {
   286  				ctmp := c[i*ldc : i*ldc+n]
   287  				for j := 0; j < n; j++ {
   288  					ctmp[j] *= beta
   289  				}
   290  			}
   291  		}
   292  		return
   293  	}
   294  
   295  	isUpper := ul == blas.Upper
   296  	if s == blas.Left {
   297  		for i := 0; i < m; i++ {
   298  			atmp := alpha * a[i*lda+i]
   299  			btmp := b[i*ldb : i*ldb+n]
   300  			ctmp := c[i*ldc : i*ldc+n]
   301  			for j, v := range btmp {
   302  				ctmp[j] *= beta
   303  				ctmp[j] += atmp * v
   304  			}
   305  
   306  			for k := 0; k < i; k++ {
   307  				var atmp float64
   308  				if isUpper {
   309  					atmp = a[k*lda+i]
   310  				} else {
   311  					atmp = a[i*lda+k]
   312  				}
   313  				atmp *= alpha
   314  				f64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
   315  			}
   316  			for k := i + 1; k < m; k++ {
   317  				var atmp float64
   318  				if isUpper {
   319  					atmp = a[i*lda+k]
   320  				} else {
   321  					atmp = a[k*lda+i]
   322  				}
   323  				atmp *= alpha
   324  				f64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp)
   325  			}
   326  		}
   327  		return
   328  	}
   329  	if isUpper {
   330  		for i := 0; i < m; i++ {
   331  			for j := n - 1; j >= 0; j-- {
   332  				tmp := alpha * b[i*ldb+j]
   333  				var tmp2 float64
   334  				atmp := a[j*lda+j+1 : j*lda+n]
   335  				btmp := b[i*ldb+j+1 : i*ldb+n]
   336  				ctmp := c[i*ldc+j+1 : i*ldc+n]
   337  				for k, v := range atmp {
   338  					ctmp[k] += tmp * v
   339  					tmp2 += btmp[k] * v
   340  				}
   341  				c[i*ldc+j] *= beta
   342  				c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
   343  			}
   344  		}
   345  		return
   346  	}
   347  	for i := 0; i < m; i++ {
   348  		for j := 0; j < n; j++ {
   349  			tmp := alpha * b[i*ldb+j]
   350  			var tmp2 float64
   351  			atmp := a[j*lda : j*lda+j]
   352  			btmp := b[i*ldb : i*ldb+j]
   353  			ctmp := c[i*ldc : i*ldc+j]
   354  			for k, v := range atmp {
   355  				ctmp[k] += tmp * v
   356  				tmp2 += btmp[k] * v
   357  			}
   358  			c[i*ldc+j] *= beta
   359  			c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2
   360  		}
   361  	}
   362  }
   363  
   364  // Dsyrk performs one of the symmetric rank-k operations
   365  //
   366  //	C = alpha * A * Aᵀ + beta * C  if tA == blas.NoTrans
   367  //	C = alpha * Aᵀ * A + beta * C  if tA == blas.Trans or tA == blas.ConjTrans
   368  //
   369  // where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and
   370  // beta are scalars.
   371  func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) {
   372  	if ul != blas.Lower && ul != blas.Upper {
   373  		panic(badUplo)
   374  	}
   375  	if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
   376  		panic(badTranspose)
   377  	}
   378  	if n < 0 {
   379  		panic(nLT0)
   380  	}
   381  	if k < 0 {
   382  		panic(kLT0)
   383  	}
   384  	row, col := k, n
   385  	if tA == blas.NoTrans {
   386  		row, col = n, k
   387  	}
   388  	if lda < max(1, col) {
   389  		panic(badLdA)
   390  	}
   391  	if ldc < max(1, n) {
   392  		panic(badLdC)
   393  	}
   394  
   395  	// Quick return if possible.
   396  	if n == 0 {
   397  		return
   398  	}
   399  
   400  	// For zero matrix size the following slice length checks are trivially satisfied.
   401  	if len(a) < lda*(row-1)+col {
   402  		panic(shortA)
   403  	}
   404  	if len(c) < ldc*(n-1)+n {
   405  		panic(shortC)
   406  	}
   407  
   408  	if alpha == 0 {
   409  		if beta == 0 {
   410  			if ul == blas.Upper {
   411  				for i := 0; i < n; i++ {
   412  					ctmp := c[i*ldc+i : i*ldc+n]
   413  					for j := range ctmp {
   414  						ctmp[j] = 0
   415  					}
   416  				}
   417  				return
   418  			}
   419  			for i := 0; i < n; i++ {
   420  				ctmp := c[i*ldc : i*ldc+i+1]
   421  				for j := range ctmp {
   422  					ctmp[j] = 0
   423  				}
   424  			}
   425  			return
   426  		}
   427  		if ul == blas.Upper {
   428  			for i := 0; i < n; i++ {
   429  				ctmp := c[i*ldc+i : i*ldc+n]
   430  				for j := range ctmp {
   431  					ctmp[j] *= beta
   432  				}
   433  			}
   434  			return
   435  		}
   436  		for i := 0; i < n; i++ {
   437  			ctmp := c[i*ldc : i*ldc+i+1]
   438  			for j := range ctmp {
   439  				ctmp[j] *= beta
   440  			}
   441  		}
   442  		return
   443  	}
   444  	if tA == blas.NoTrans {
   445  		if ul == blas.Upper {
   446  			for i := 0; i < n; i++ {
   447  				ctmp := c[i*ldc+i : i*ldc+n]
   448  				atmp := a[i*lda : i*lda+k]
   449  				if beta == 0 {
   450  					for jc := range ctmp {
   451  						j := jc + i
   452  						ctmp[jc] = alpha * f64.DotUnitary(atmp, a[j*lda:j*lda+k])
   453  					}
   454  				} else {
   455  					for jc, vc := range ctmp {
   456  						j := jc + i
   457  						ctmp[jc] = vc*beta + alpha*f64.DotUnitary(atmp, a[j*lda:j*lda+k])
   458  					}
   459  				}
   460  			}
   461  			return
   462  		}
   463  		for i := 0; i < n; i++ {
   464  			ctmp := c[i*ldc : i*ldc+i+1]
   465  			atmp := a[i*lda : i*lda+k]
   466  			if beta == 0 {
   467  				for j := range ctmp {
   468  					ctmp[j] = alpha * f64.DotUnitary(a[j*lda:j*lda+k], atmp)
   469  				}
   470  			} else {
   471  				for j, vc := range ctmp {
   472  					ctmp[j] = vc*beta + alpha*f64.DotUnitary(a[j*lda:j*lda+k], atmp)
   473  				}
   474  			}
   475  		}
   476  		return
   477  	}
   478  	// Cases where a is transposed.
   479  	if ul == blas.Upper {
   480  		for i := 0; i < n; i++ {
   481  			ctmp := c[i*ldc+i : i*ldc+n]
   482  			if beta == 0 {
   483  				for j := range ctmp {
   484  					ctmp[j] = 0
   485  				}
   486  			} else if beta != 1 {
   487  				for j := range ctmp {
   488  					ctmp[j] *= beta
   489  				}
   490  			}
   491  			for l := 0; l < k; l++ {
   492  				tmp := alpha * a[l*lda+i]
   493  				if tmp != 0 {
   494  					f64.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp)
   495  				}
   496  			}
   497  		}
   498  		return
   499  	}
   500  	for i := 0; i < n; i++ {
   501  		ctmp := c[i*ldc : i*ldc+i+1]
   502  		if beta != 1 {
   503  			for j := range ctmp {
   504  				ctmp[j] *= beta
   505  			}
   506  		}
   507  		for l := 0; l < k; l++ {
   508  			tmp := alpha * a[l*lda+i]
   509  			if tmp != 0 {
   510  				f64.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp)
   511  			}
   512  		}
   513  	}
   514  }
   515  
   516  // Dsyr2k performs one of the symmetric rank 2k operations
   517  //
   518  //	C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C  if tA == blas.NoTrans
   519  //	C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C  if tA == blas.Trans or tA == blas.ConjTrans
   520  //
   521  // where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and
   522  // alpha and beta are scalars.
   523  func (Implementation) Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) {
   524  	if ul != blas.Lower && ul != blas.Upper {
   525  		panic(badUplo)
   526  	}
   527  	if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans {
   528  		panic(badTranspose)
   529  	}
   530  	if n < 0 {
   531  		panic(nLT0)
   532  	}
   533  	if k < 0 {
   534  		panic(kLT0)
   535  	}
   536  	row, col := k, n
   537  	if tA == blas.NoTrans {
   538  		row, col = n, k
   539  	}
   540  	if lda < max(1, col) {
   541  		panic(badLdA)
   542  	}
   543  	if ldb < max(1, col) {
   544  		panic(badLdB)
   545  	}
   546  	if ldc < max(1, n) {
   547  		panic(badLdC)
   548  	}
   549  
   550  	// Quick return if possible.
   551  	if n == 0 {
   552  		return
   553  	}
   554  
   555  	// For zero matrix size the following slice length checks are trivially satisfied.
   556  	if len(a) < lda*(row-1)+col {
   557  		panic(shortA)
   558  	}
   559  	if len(b) < ldb*(row-1)+col {
   560  		panic(shortB)
   561  	}
   562  	if len(c) < ldc*(n-1)+n {
   563  		panic(shortC)
   564  	}
   565  
   566  	if alpha == 0 {
   567  		if beta == 0 {
   568  			if ul == blas.Upper {
   569  				for i := 0; i < n; i++ {
   570  					ctmp := c[i*ldc+i : i*ldc+n]
   571  					for j := range ctmp {
   572  						ctmp[j] = 0
   573  					}
   574  				}
   575  				return
   576  			}
   577  			for i := 0; i < n; i++ {
   578  				ctmp := c[i*ldc : i*ldc+i+1]
   579  				for j := range ctmp {
   580  					ctmp[j] = 0
   581  				}
   582  			}
   583  			return
   584  		}
   585  		if ul == blas.Upper {
   586  			for i := 0; i < n; i++ {
   587  				ctmp := c[i*ldc+i : i*ldc+n]
   588  				for j := range ctmp {
   589  					ctmp[j] *= beta
   590  				}
   591  			}
   592  			return
   593  		}
   594  		for i := 0; i < n; i++ {
   595  			ctmp := c[i*ldc : i*ldc+i+1]
   596  			for j := range ctmp {
   597  				ctmp[j] *= beta
   598  			}
   599  		}
   600  		return
   601  	}
   602  	if tA == blas.NoTrans {
   603  		if ul == blas.Upper {
   604  			for i := 0; i < n; i++ {
   605  				atmp := a[i*lda : i*lda+k]
   606  				btmp := b[i*ldb : i*ldb+k]
   607  				ctmp := c[i*ldc+i : i*ldc+n]
   608  				if beta == 0 {
   609  					for jc := range ctmp {
   610  						j := i + jc
   611  						var tmp1, tmp2 float64
   612  						binner := b[j*ldb : j*ldb+k]
   613  						for l, v := range a[j*lda : j*lda+k] {
   614  							tmp1 += v * btmp[l]
   615  							tmp2 += atmp[l] * binner[l]
   616  						}
   617  						ctmp[jc] = alpha * (tmp1 + tmp2)
   618  					}
   619  				} else {
   620  					for jc := range ctmp {
   621  						j := i + jc
   622  						var tmp1, tmp2 float64
   623  						binner := b[j*ldb : j*ldb+k]
   624  						for l, v := range a[j*lda : j*lda+k] {
   625  							tmp1 += v * btmp[l]
   626  							tmp2 += atmp[l] * binner[l]
   627  						}
   628  						ctmp[jc] *= beta
   629  						ctmp[jc] += alpha * (tmp1 + tmp2)
   630  					}
   631  				}
   632  			}
   633  			return
   634  		}
   635  		for i := 0; i < n; i++ {
   636  			atmp := a[i*lda : i*lda+k]
   637  			btmp := b[i*ldb : i*ldb+k]
   638  			ctmp := c[i*ldc : i*ldc+i+1]
   639  			if beta == 0 {
   640  				for j := 0; j <= i; j++ {
   641  					var tmp1, tmp2 float64
   642  					binner := b[j*ldb : j*ldb+k]
   643  					for l, v := range a[j*lda : j*lda+k] {
   644  						tmp1 += v * btmp[l]
   645  						tmp2 += atmp[l] * binner[l]
   646  					}
   647  					ctmp[j] = alpha * (tmp1 + tmp2)
   648  				}
   649  			} else {
   650  				for j := 0; j <= i; j++ {
   651  					var tmp1, tmp2 float64
   652  					binner := b[j*ldb : j*ldb+k]
   653  					for l, v := range a[j*lda : j*lda+k] {
   654  						tmp1 += v * btmp[l]
   655  						tmp2 += atmp[l] * binner[l]
   656  					}
   657  					ctmp[j] *= beta
   658  					ctmp[j] += alpha * (tmp1 + tmp2)
   659  				}
   660  			}
   661  		}
   662  		return
   663  	}
   664  	if ul == blas.Upper {
   665  		for i := 0; i < n; i++ {
   666  			ctmp := c[i*ldc+i : i*ldc+n]
   667  			switch beta {
   668  			case 0:
   669  				for j := range ctmp {
   670  					ctmp[j] = 0
   671  				}
   672  			case 1:
   673  			default:
   674  				for j := range ctmp {
   675  					ctmp[j] *= beta
   676  				}
   677  			}
   678  			for l := 0; l < k; l++ {
   679  				tmp1 := alpha * b[l*ldb+i]
   680  				tmp2 := alpha * a[l*lda+i]
   681  				btmp := b[l*ldb+i : l*ldb+n]
   682  				if tmp1 != 0 || tmp2 != 0 {
   683  					for j, v := range a[l*lda+i : l*lda+n] {
   684  						ctmp[j] += v*tmp1 + btmp[j]*tmp2
   685  					}
   686  				}
   687  			}
   688  		}
   689  		return
   690  	}
   691  	for i := 0; i < n; i++ {
   692  		ctmp := c[i*ldc : i*ldc+i+1]
   693  		switch beta {
   694  		case 0:
   695  			for j := range ctmp {
   696  				ctmp[j] = 0
   697  			}
   698  		case 1:
   699  		default:
   700  			for j := range ctmp {
   701  				ctmp[j] *= beta
   702  			}
   703  		}
   704  		for l := 0; l < k; l++ {
   705  			tmp1 := alpha * b[l*ldb+i]
   706  			tmp2 := alpha * a[l*lda+i]
   707  			btmp := b[l*ldb : l*ldb+i+1]
   708  			if tmp1 != 0 || tmp2 != 0 {
   709  				for j, v := range a[l*lda : l*lda+i+1] {
   710  					ctmp[j] += v*tmp1 + btmp[j]*tmp2
   711  				}
   712  			}
   713  		}
   714  	}
   715  }
   716  
   717  // Dtrmm performs one of the matrix-matrix operations
   718  //
   719  //	B = alpha * A * B   if tA == blas.NoTrans and side == blas.Left
   720  //	B = alpha * Aᵀ * B  if tA == blas.Trans or blas.ConjTrans, and side == blas.Left
   721  //	B = alpha * B * A   if tA == blas.NoTrans and side == blas.Right
   722  //	B = alpha * B * Aᵀ  if tA == blas.Trans or blas.ConjTrans, and side == blas.Right
   723  //
   724  // where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar.
   725  func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) {
   726  	if s != blas.Left && s != blas.Right {
   727  		panic(badSide)
   728  	}
   729  	if ul != blas.Lower && ul != blas.Upper {
   730  		panic(badUplo)
   731  	}
   732  	if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans {
   733  		panic(badTranspose)
   734  	}
   735  	if d != blas.NonUnit && d != blas.Unit {
   736  		panic(badDiag)
   737  	}
   738  	if m < 0 {
   739  		panic(mLT0)
   740  	}
   741  	if n < 0 {
   742  		panic(nLT0)
   743  	}
   744  	k := n
   745  	if s == blas.Left {
   746  		k = m
   747  	}
   748  	if lda < max(1, k) {
   749  		panic(badLdA)
   750  	}
   751  	if ldb < max(1, n) {
   752  		panic(badLdB)
   753  	}
   754  
   755  	// Quick return if possible.
   756  	if m == 0 || n == 0 {
   757  		return
   758  	}
   759  
   760  	// For zero matrix size the following slice length checks are trivially satisfied.
   761  	if len(a) < lda*(k-1)+k {
   762  		panic(shortA)
   763  	}
   764  	if len(b) < ldb*(m-1)+n {
   765  		panic(shortB)
   766  	}
   767  
   768  	if alpha == 0 {
   769  		for i := 0; i < m; i++ {
   770  			btmp := b[i*ldb : i*ldb+n]
   771  			for j := range btmp {
   772  				btmp[j] = 0
   773  			}
   774  		}
   775  		return
   776  	}
   777  
   778  	nonUnit := d == blas.NonUnit
   779  	if s == blas.Left {
   780  		if tA == blas.NoTrans {
   781  			if ul == blas.Upper {
   782  				for i := 0; i < m; i++ {
   783  					tmp := alpha
   784  					if nonUnit {
   785  						tmp *= a[i*lda+i]
   786  					}
   787  					btmp := b[i*ldb : i*ldb+n]
   788  					f64.ScalUnitary(tmp, btmp)
   789  					for ka, va := range a[i*lda+i+1 : i*lda+m] {
   790  						k := ka + i + 1
   791  						if va != 0 {
   792  							f64.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
   793  						}
   794  					}
   795  				}
   796  				return
   797  			}
   798  			for i := m - 1; i >= 0; i-- {
   799  				tmp := alpha
   800  				if nonUnit {
   801  					tmp *= a[i*lda+i]
   802  				}
   803  				btmp := b[i*ldb : i*ldb+n]
   804  				f64.ScalUnitary(tmp, btmp)
   805  				for k, va := range a[i*lda : i*lda+i] {
   806  					if va != 0 {
   807  						f64.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp)
   808  					}
   809  				}
   810  			}
   811  			return
   812  		}
   813  		// Cases where a is transposed.
   814  		if ul == blas.Upper {
   815  			for k := m - 1; k >= 0; k-- {
   816  				btmpk := b[k*ldb : k*ldb+n]
   817  				for ia, va := range a[k*lda+k+1 : k*lda+m] {
   818  					i := ia + k + 1
   819  					btmp := b[i*ldb : i*ldb+n]
   820  					if va != 0 {
   821  						f64.AxpyUnitary(alpha*va, btmpk, btmp)
   822  					}
   823  				}
   824  				tmp := alpha
   825  				if nonUnit {
   826  					tmp *= a[k*lda+k]
   827  				}
   828  				if tmp != 1 {
   829  					f64.ScalUnitary(tmp, btmpk)
   830  				}
   831  			}
   832  			return
   833  		}
   834  		for k := 0; k < m; k++ {
   835  			btmpk := b[k*ldb : k*ldb+n]
   836  			for i, va := range a[k*lda : k*lda+k] {
   837  				btmp := b[i*ldb : i*ldb+n]
   838  				if va != 0 {
   839  					f64.AxpyUnitary(alpha*va, btmpk, btmp)
   840  				}
   841  			}
   842  			tmp := alpha
   843  			if nonUnit {
   844  				tmp *= a[k*lda+k]
   845  			}
   846  			if tmp != 1 {
   847  				f64.ScalUnitary(tmp, btmpk)
   848  			}
   849  		}
   850  		return
   851  	}
   852  	// Cases where a is on the right
   853  	if tA == blas.NoTrans {
   854  		if ul == blas.Upper {
   855  			for i := 0; i < m; i++ {
   856  				btmp := b[i*ldb : i*ldb+n]
   857  				for k := n - 1; k >= 0; k-- {
   858  					tmp := alpha * btmp[k]
   859  					if tmp == 0 {
   860  						continue
   861  					}
   862  					btmp[k] = tmp
   863  					if nonUnit {
   864  						btmp[k] *= a[k*lda+k]
   865  					}
   866  					f64.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n])
   867  				}
   868  			}
   869  			return
   870  		}
   871  		for i := 0; i < m; i++ {
   872  			btmp := b[i*ldb : i*ldb+n]
   873  			for k := 0; k < n; k++ {
   874  				tmp := alpha * btmp[k]
   875  				if tmp == 0 {
   876  					continue
   877  				}
   878  				btmp[k] = tmp
   879  				if nonUnit {
   880  					btmp[k] *= a[k*lda+k]
   881  				}
   882  				f64.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k])
   883  			}
   884  		}
   885  		return
   886  	}
   887  	// Cases where a is transposed.
   888  	if ul == blas.Upper {
   889  		for i := 0; i < m; i++ {
   890  			btmp := b[i*ldb : i*ldb+n]
   891  			for j, vb := range btmp {
   892  				tmp := vb
   893  				if nonUnit {
   894  					tmp *= a[j*lda+j]
   895  				}
   896  				tmp += f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n])
   897  				btmp[j] = alpha * tmp
   898  			}
   899  		}
   900  		return
   901  	}
   902  	for i := 0; i < m; i++ {
   903  		btmp := b[i*ldb : i*ldb+n]
   904  		for j := n - 1; j >= 0; j-- {
   905  			tmp := btmp[j]
   906  			if nonUnit {
   907  				tmp *= a[j*lda+j]
   908  			}
   909  			tmp += f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j])
   910  			btmp[j] = alpha * tmp
   911  		}
   912  	}
   913  }