github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/cgo/lapack.go

github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/cgo/lapack.go (about)

     1  // Copyright ©2015 The gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This repository is no longer maintained.
     6  // Development has moved to https://github.com/gonum/netlib.
     7  //
     8  // Package cgo provides an interface to bindings for a C LAPACK library.
     9  package cgo
    10  
    11  import (
    12  	"math"
    13  
    14  	"github.com/gonum/blas"
    15  	"github.com/gonum/lapack"
    16  	"github.com/gonum/lapack/cgo/lapacke"
    17  )
    18  
    19  // Copied from lapack/native. Keep in sync.
    20  const (
    21  	absIncNotOne    = "lapack: increment not one or negative one"
    22  	badAlpha        = "lapack: bad alpha length"
    23  	badAuxv         = "lapack: auxv has insufficient length"
    24  	badBeta         = "lapack: bad beta length"
    25  	badD            = "lapack: d has insufficient length"
    26  	badDecompUpdate = "lapack: bad decomp update"
    27  	badDiag         = "lapack: bad diag"
    28  	badDims         = "lapack: bad input dimensions"
    29  	badDirect       = "lapack: bad direct"
    30  	badE            = "lapack: e has insufficient length"
    31  	badEVComp       = "lapack: bad EVComp"
    32  	badEVJob        = "lapack: bad EVJob"
    33  	badEVSide       = "lapack: bad EVSide"
    34  	badGSVDJob      = "lapack: bad GSVDJob"
    35  	badHowMany      = "lapack: bad HowMany"
    36  	badIlo          = "lapack: ilo out of range"
    37  	badIhi          = "lapack: ihi out of range"
    38  	badIpiv         = "lapack: bad permutation length"
    39  	badJob          = "lapack: bad Job"
    40  	badK1           = "lapack: k1 out of range"
    41  	badK2           = "lapack: k2 out of range"
    42  	badKperm        = "lapack: incorrect permutation length"
    43  	badLdA          = "lapack: index of a out of range"
    44  	badNb           = "lapack: nb out of range"
    45  	badNorm         = "lapack: bad norm"
    46  	badPivot        = "lapack: bad pivot"
    47  	badS            = "lapack: s has insufficient length"
    48  	badShifts       = "lapack: bad shifts"
    49  	badSide         = "lapack: bad side"
    50  	badSlice        = "lapack: bad input slice length"
    51  	badSort         = "lapack: bad Sort"
    52  	badStore        = "lapack: bad store"
    53  	badTau          = "lapack: tau has insufficient length"
    54  	badTauQ         = "lapack: tauQ has insufficient length"
    55  	badTauP         = "lapack: tauP has insufficient length"
    56  	badTrans        = "lapack: bad trans"
    57  	badVn1          = "lapack: vn1 has insufficient length"
    58  	badVn2          = "lapack: vn2 has insufficient length"
    59  	badUplo         = "lapack: illegal triangle"
    60  	badWork         = "lapack: insufficient working memory"
    61  	badWorkStride   = "lapack: insufficient working array stride"
    62  	badZ            = "lapack: insufficient z length"
    63  	kGTM            = "lapack: k > m"
    64  	kGTN            = "lapack: k > n"
    65  	kLT0            = "lapack: k < 0"
    66  	mLT0            = "lapack: m < 0"
    67  	mLTN            = "lapack: m < n"
    68  	nanScale        = "lapack: NaN scale factor"
    69  	negDimension    = "lapack: negative matrix dimension"
    70  	negZ            = "lapack: negative z value"
    71  	nLT0            = "lapack: n < 0"
    72  	nLTM            = "lapack: n < m"
    73  	offsetGTM       = "lapack: offset > m"
    74  	shortWork       = "lapack: working array shorter than declared"
    75  	zeroDiv         = "lapack: zero divisor"
    76  )
    77  
    78  func min(m, n int) int {
    79  	if m < n {
    80  		return m
    81  	}
    82  	return n
    83  }
    84  
    85  func max(m, n int) int {
    86  	if m < n {
    87  		return n
    88  	}
    89  	return m
    90  }
    91  
    92  // checkMatrix verifies the parameters of a matrix input.
    93  // Copied from lapack/native. Keep in sync.
    94  func checkMatrix(m, n int, a []float64, lda int) {
    95  	if m < 0 {
    96  		panic("lapack: has negative number of rows")
    97  	}
    98  	if n < 0 {
    99  		panic("lapack: has negative number of columns")
   100  	}
   101  	if lda < n {
   102  		panic("lapack: stride less than number of columns")
   103  	}
   104  	if len(a) < (m-1)*lda+n {
   105  		panic("lapack: insufficient matrix slice length")
   106  	}
   107  }
   108  
   109  // checkVector verifies the parameters of a vector input.
   110  // Copied from lapack/native. Keep in sync.
   111  func checkVector(n int, v []float64, inc int) {
   112  	if n < 0 {
   113  		panic("lapack: negative vector length")
   114  	}
   115  	if (inc > 0 && (n-1)*inc >= len(v)) || (inc < 0 && (1-n)*inc >= len(v)) {
   116  		panic("lapack: insufficient vector slice length")
   117  	}
   118  }
   119  
   120  // Implementation is the cgo-based C implementation of LAPACK routines.
   121  type Implementation struct{}
   122  
   123  var _ lapack.Float64 = Implementation{}
   124  
   125  // Dgeqp3 computes a QR factorization with column pivoting of the
   126  // m×n matrix A: A*P = Q*R using Level 3 BLAS.
   127  //
   128  // The matrix Q is represented as a product of elementary reflectors
   129  //  Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n).
   130  // Each H_i has the form
   131  //  H_i = I - tau * v * v^T
   132  // where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1;
   133  // v[i:m] is stored on exit in A[i:m, i], and tau in tau[i].
   134  //
   135  // jpvt specifies a column pivot to be applied to A. If
   136  // jpvt[j] is at least zero, the jth column of A is permuted
   137  // to the front of A*P (a leading column), if jpvt[j] is -1
   138  // the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3
   139  // will panic. On return, jpvt holds the permutation that was
   140  // applied; the jth column of A*P was the jpvt[j] column of A.
   141  // jpvt must have length n or Dgeqp3 will panic.
   142  //
   143  // tau holds the scalar factors of the elementary reflectors.
   144  // It must have length min(m, n), otherwise Dgeqp3 will panic.
   145  //
   146  // work must have length at least max(1,lwork), and lwork must be at least
   147  // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must
   148  // be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return,
   149  // work[0] will contain the optimal value of lwork.
   150  //
   151  // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork
   152  // will be stored in work[0].
   153  //
   154  // Dgeqp3 is an internal routine. It is exported for testing purposes.
   155  func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) {
   156  	checkMatrix(m, n, a, lda)
   157  	if len(jpvt) != n {
   158  		panic(badIpiv)
   159  	}
   160  	if len(tau) != min(m, n) {
   161  		panic(badTau)
   162  	}
   163  	if len(work) < max(1, lwork) {
   164  		panic(badWork)
   165  	}
   166  
   167  	// Don't update jpvt if querying lwkopt.
   168  	if lwork == -1 {
   169  		lapacke.Dgeqp3(m, n, a, lda, nil, nil, work, -1)
   170  		return
   171  	}
   172  
   173  	jpvt32 := make([]int32, len(jpvt))
   174  	for i, v := range jpvt {
   175  		v++
   176  		if v != int(int32(v)) || v < 0 || n < v {
   177  			panic("lapack: jpvt element out of range")
   178  		}
   179  		jpvt32[i] = int32(v)
   180  	}
   181  
   182  	lapacke.Dgeqp3(m, n, a, lda, jpvt32, tau, work, lwork)
   183  
   184  	for i, v := range jpvt32 {
   185  		jpvt[i] = int(v - 1)
   186  	}
   187  }
   188  
   189  // Dgerqf computes an RQ factorization of the m×n matrix A,
   190  //  A = R * Q.
   191  // On exit, if m <= n, the upper triangle of the subarray
   192  // A[0:m, n-m:n] contains the m×m upper triangular matrix R.
   193  // If m >= n, the elements on and above the (m-n)-th subdiagonal
   194  // contain the m×n upper trapezoidal matrix R.
   195  // The remaining elements, with tau, represent the
   196  // orthogonal matrix Q as a product of min(m,n) elementary
   197  // reflectors.
   198  //
   199  // The matrix Q is represented as a product of elementary reflectors
   200  //  Q = H_0 H_1 . . . H_{min(m,n)-1}.
   201  // Each H(i) has the form
   202  //  H_i = I - tau_i * v * v^T
   203  // where v is a vector with v[0:n-k+i-1] stored in A[m-k+i, 0:n-k+i-1],
   204  // v[n-k+i:n] = 0 and v[n-k+i] = 1.
   205  //
   206  // tau must have length min(m,n), work must have length max(1, lwork),
   207  // and lwork must be -1 or at least max(1, m), otherwise Dgerqf will panic.
   208  // On exit, work[0] will contain the optimal length for work.
   209  //
   210  // Dgerqf is an internal routine. It is exported for testing purposes.
   211  func (impl Implementation) Dgerqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
   212  	checkMatrix(m, n, a, lda)
   213  
   214  	if len(work) < max(1, lwork) {
   215  		panic(shortWork)
   216  	}
   217  	if lwork != -1 && lwork < max(1, m) {
   218  		panic(badWork)
   219  	}
   220  
   221  	k := min(m, n)
   222  	if len(tau) != k {
   223  		panic(badTau)
   224  	}
   225  
   226  	lapacke.Dgerqf(m, n, a, lda, tau, work, lwork)
   227  }
   228  
   229  // Dlacn2 estimates the 1-norm of an n×n matrix A using sequential updates with
   230  // matrix-vector products provided externally.
   231  //
   232  // Dlacn2 is called sequentially and it returns the value of est and kase to be
   233  // used on the next call.
   234  // On the initial call, kase must be 0.
   235  // In between calls, x must be overwritten by
   236  //  A * X    if kase was returned as 1,
   237  //  A^T * X  if kase was returned as 2,
   238  // and all other parameters must not be changed.
   239  // On the final return, kase is returned as 0, v contains A*W where W is a
   240  // vector, and est = norm(V)/norm(W) is a lower bound for 1-norm of A.
   241  //
   242  // v, x, and isgn must all have length n and n must be at least 1, otherwise
   243  // Dlacn2 will panic. isave is used for temporary storage.
   244  //
   245  // Dlacn2 is an internal routine. It is exported for testing purposes.
   246  func (impl Implementation) Dlacn2(n int, v, x []float64, isgn []int, est float64, kase int, isave *[3]int) (float64, int) {
   247  	if n < 1 {
   248  		panic("lapack: non-positive n")
   249  	}
   250  	checkVector(n, x, 1)
   251  	checkVector(n, v, 1)
   252  	if len(isgn) < n {
   253  		panic("lapack: insufficient isgn length")
   254  	}
   255  	if isave[0] < 0 || isave[0] > 5 {
   256  		panic("lapack: bad isave value")
   257  	}
   258  	if isave[0] == 0 && kase != 0 {
   259  		panic("lapack: bad isave value")
   260  	}
   261  
   262  	isgn32 := make([]int32, n)
   263  	for i, v := range isgn {
   264  		isgn32[i] = int32(v)
   265  	}
   266  	pest := []float64{est}
   267  	// Save one allocation by putting isave and kase into the same slice.
   268  	isavekase := []int32{int32(isave[0]), int32(isave[1]), int32(isave[2]), int32(kase)}
   269  	lapacke.Dlacn2(n, v, x, isgn32, pest, isavekase[3:], isavekase[:3])
   270  	for i, v := range isgn32 {
   271  		isgn[i] = int(v)
   272  	}
   273  	isave[0] = int(isavekase[0])
   274  	isave[1] = int(isavekase[1])
   275  	isave[2] = int(isavekase[2])
   276  
   277  	return pest[0], int(isavekase[3])
   278  }
   279  
   280  // Dlacpy copies the elements of A specified by uplo into B. Uplo can specify
   281  // a triangular portion with blas.Upper or blas.Lower, or can specify all of the
   282  // elemest with blas.All.
   283  func (impl Implementation) Dlacpy(uplo blas.Uplo, m, n int, a []float64, lda int, b []float64, ldb int) {
   284  	checkMatrix(m, n, a, lda)
   285  	checkMatrix(m, n, b, ldb)
   286  	lapacke.Dlacpy(uplo, m, n, a, lda, b, ldb)
   287  }
   288  
   289  // Dlapmt rearranges the columns of the m×n matrix X as specified by the
   290  // permutation k_0, k_1, ..., k_n-1 of the integers 0, ..., n-1.
   291  //
   292  // If forward is true a forward permutation is performed:
   293  //
   294  //  X[0:m, k[j]] is moved to X[0:m, j] for j = 0, 1, ..., n-1.
   295  //
   296  // otherwise a backward permutation is performed:
   297  //
   298  //  X[0:m, j] is moved to X[0:m, k[j]] for j = 0, 1, ..., n-1.
   299  //
   300  // k must have length n, otherwise Dlapmt will panic. k is zero-indexed.
   301  func (impl Implementation) Dlapmt(forward bool, m, n int, x []float64, ldx int, k []int) {
   302  	checkMatrix(m, n, x, ldx)
   303  	if len(k) != n {
   304  		panic(badKperm)
   305  	}
   306  
   307  	if n <= 1 {
   308  		return
   309  	}
   310  
   311  	var forwrd int32
   312  	if forward {
   313  		forwrd = 1
   314  	}
   315  	k32 := make([]int32, len(k))
   316  	for i, v := range k {
   317  		v++ // Convert to 1-based indexing.
   318  		if v != int(int32(v)) {
   319  			panic("lapack: k element out of range")
   320  		}
   321  		k32[i] = int32(v)
   322  	}
   323  
   324  	lapacke.Dlapmt(forwrd, m, n, x, ldx, k32)
   325  }
   326  
   327  // Dlapy2 is the LAPACK version of math.Hypot.
   328  //
   329  // Dlapy2 is an internal routine. It is exported for testing purposes.
   330  func (Implementation) Dlapy2(x, y float64) float64 {
   331  	return lapacke.Dlapy2(x, y)
   332  }
   333  
   334  // Dlarfb applies a block reflector to a matrix.
   335  //
   336  // In the call to Dlarfb, the mxn c is multiplied by the implicitly defined matrix h as follows:
   337  //  c = h * c if side == Left and trans == NoTrans
   338  //  c = c * h if side == Right and trans == NoTrans
   339  //  c = h^T * c if side == Left and trans == Trans
   340  //  c = c * h^T if side == Right and trans == Trans
   341  // h is a product of elementary reflectors. direct sets the direction of multiplication
   342  //  h = h_1 * h_2 * ... * h_k if direct == Forward
   343  //  h = h_k * h_k-1 * ... * h_1 if direct == Backward
   344  // The combination of direct and store defines the orientation of the elementary
   345  // reflectors. In all cases the ones on the diagonal are implicitly represented.
   346  //
   347  // If direct == lapack.Forward and store == lapack.ColumnWise
   348  //  V = [ 1        ]
   349  //      [v1   1    ]
   350  //      [v1  v2   1]
   351  //      [v1  v2  v3]
   352  //      [v1  v2  v3]
   353  // If direct == lapack.Forward and store == lapack.RowWise
   354  //  V = [ 1  v1  v1  v1  v1]
   355  //      [     1  v2  v2  v2]
   356  //      [         1  v3  v3]
   357  // If direct == lapack.Backward and store == lapack.ColumnWise
   358  //  V = [v1  v2  v3]
   359  //      [v1  v2  v3]
   360  //      [ 1  v2  v3]
   361  //      [     1  v3]
   362  //      [         1]
   363  // If direct == lapack.Backward and store == lapack.RowWise
   364  //  V = [v1  v1   1        ]
   365  //      [v2  v2  v2   1    ]
   366  //      [v3  v3  v3  v3   1]
   367  // An elementary reflector can be explicitly constructed by extracting the
   368  // corresponding elements of v, placing a 1 where the diagonal would be, and
   369  // placing zeros in the remaining elements.
   370  //
   371  // t is a k×k matrix containing the block reflector, and this function will panic
   372  // if t is not of sufficient size. See Dlarft for more information.
   373  //
   374  // work is a temporary storage matrix with stride ldwork.
   375  // work must be of size at least n×k side == Left and m×k if side == Right, and
   376  // this function will panic if this size is not met.
   377  //
   378  // Dlarfb is an internal routine. It is exported for testing purposes.
   379  func (Implementation) Dlarfb(side blas.Side, trans blas.Transpose, direct lapack.Direct, store lapack.StoreV, m, n, k int, v []float64, ldv int, t []float64, ldt int, c []float64, ldc int, work []float64, ldwork int) {
   380  	if side != blas.Left && side != blas.Right {
   381  		panic(badSide)
   382  	}
   383  	if trans != blas.Trans && trans != blas.NoTrans {
   384  		panic(badTrans)
   385  	}
   386  	if direct != lapack.Forward && direct != lapack.Backward {
   387  		panic(badDirect)
   388  	}
   389  	if store != lapack.ColumnWise && store != lapack.RowWise {
   390  		panic(badStore)
   391  	}
   392  	checkMatrix(m, n, c, ldc)
   393  	if k < 0 {
   394  		panic(kLT0)
   395  	}
   396  	checkMatrix(k, k, t, ldt)
   397  	nv := m
   398  	nw := n
   399  	if side == blas.Right {
   400  		nv = n
   401  		nw = m
   402  	}
   403  	if store == lapack.ColumnWise {
   404  		checkMatrix(nv, k, v, ldv)
   405  	} else {
   406  		checkMatrix(k, nv, v, ldv)
   407  	}
   408  	// TODO(vladimir-ch): Replace the following two lines with
   409  	//  checkMatrix(nw, k, work, ldwork)
   410  	// if and when the issue
   411  	//  https://github.com/Reference-LAPACK/lapack/issues/37
   412  	// has been resolved.
   413  	ldwork = nw
   414  	work = make([]float64, ldwork*k)
   415  
   416  	lapacke.Dlarfb(side, trans, byte(direct), byte(store), m, n, k, v, ldv, t, ldt, c, ldc, work, ldwork)
   417  }
   418  
   419  // Dlarfg generates an elementary reflector for a Householder matrix. It creates
   420  // a real elementary reflector of order n such that
   421  //  H * (alpha) = (beta)
   422  //      (    x)   (   0)
   423  //  H^T * H = I
   424  // H is represented in the form
   425  //  H = 1 - tau * (1; v) * (1 v^T)
   426  // where tau is a real scalar.
   427  //
   428  // On entry, x contains the vector x, on exit it contains v.
   429  //
   430  // Dlarfg is an internal routine. It is exported for testing purposes.
   431  func (impl Implementation) Dlarfg(n int, alpha float64, x []float64, incX int) (beta, tau float64) {
   432  	if n < 0 {
   433  		panic(nLT0)
   434  	}
   435  	if n <= 1 {
   436  		return alpha, 0
   437  	}
   438  	checkVector(n-1, x, incX)
   439  	_alpha := []float64{alpha}
   440  	_tau := []float64{0}
   441  	lapacke.Dlarfg(n, _alpha, x, incX, _tau)
   442  	return _alpha[0], _tau[0]
   443  }
   444  
   445  // Dlarft forms the triangular factor T of a block reflector H, storing the answer
   446  // in t.
   447  //  H = I - V * T * V^T  if store == lapack.ColumnWise
   448  //  H = I - V^T * T * V  if store == lapack.RowWise
   449  // H is defined by a product of the elementary reflectors where
   450  //  H = H_0 * H_1 * ... * H_{k-1}  if direct == lapack.Forward
   451  //  H = H_{k-1} * ... * H_1 * H_0  if direct == lapack.Backward
   452  //
   453  // t is a k×k triangular matrix. t is upper triangular if direct = lapack.Forward
   454  // and lower triangular otherwise. This function will panic if t is not of
   455  // sufficient size.
   456  //
   457  // store describes the storage of the elementary reflectors in v. Please see
   458  // Dlarfb for a description of layout.
   459  //
   460  // tau contains the scalar factors of the elementary reflectors H_i.
   461  //
   462  // Dlarft is an internal routine. It is exported for testing purposes.
   463  func (Implementation) Dlarft(direct lapack.Direct, store lapack.StoreV, n, k int,
   464  	v []float64, ldv int, tau []float64, t []float64, ldt int) {
   465  	if n == 0 {
   466  		return
   467  	}
   468  	if n < 0 || k < 0 {
   469  		panic(negDimension)
   470  	}
   471  	if direct != lapack.Forward && direct != lapack.Backward {
   472  		panic(badDirect)
   473  	}
   474  	if store != lapack.RowWise && store != lapack.ColumnWise {
   475  		panic(badStore)
   476  	}
   477  	if len(tau) < k {
   478  		panic(badTau)
   479  	}
   480  	checkMatrix(k, k, t, ldt)
   481  
   482  	lapacke.Dlarft(byte(direct), byte(store), n, k, v, ldv, tau, t, ldt)
   483  }
   484  
   485  // Dlange computes the matrix norm of the general m×n matrix a. The input norm
   486  // specifies the norm computed.
   487  //  lapack.MaxAbs: the maximum absolute value of an element.
   488  //  lapack.MaxColumnSum: the maximum column sum of the absolute values of the entries.
   489  //  lapack.MaxRowSum: the maximum row sum of the absolute values of the entries.
   490  //  lapack.NormFrob: the square root of the sum of the squares of the entries.
   491  // If norm == lapack.MaxColumnSum, work must be of length n, and this function will panic otherwise.
   492  // There are no restrictions on work for the other matrix norms.
   493  func (impl Implementation) Dlange(norm lapack.MatrixNorm, m, n int, a []float64, lda int, work []float64) float64 {
   494  	checkMatrix(m, n, a, lda)
   495  	switch norm {
   496  	case lapack.MaxRowSum, lapack.MaxColumnSum, lapack.NormFrob, lapack.MaxAbs:
   497  	default:
   498  		panic(badNorm)
   499  	}
   500  	if norm == lapack.MaxColumnSum && len(work) < n {
   501  		panic(badWork)
   502  	}
   503  	return lapacke.Dlange(byte(norm), m, n, a, lda, work)
   504  }
   505  
   506  // Dlansy computes the specified norm of an n×n symmetric matrix. If
   507  // norm == lapack.MaxColumnSum or norm == lapackMaxRowSum work must have length
   508  // at least n, otherwise work is unused.
   509  func (impl Implementation) Dlansy(norm lapack.MatrixNorm, uplo blas.Uplo, n int, a []float64, lda int, work []float64) float64 {
   510  	checkMatrix(n, n, a, lda)
   511  	switch norm {
   512  	case lapack.MaxRowSum, lapack.MaxColumnSum, lapack.NormFrob, lapack.MaxAbs:
   513  	default:
   514  		panic(badNorm)
   515  	}
   516  	if (norm == lapack.MaxColumnSum || norm == lapack.MaxRowSum) && len(work) < n {
   517  		panic(badWork)
   518  	}
   519  	if uplo != blas.Upper && uplo != blas.Lower {
   520  		panic(badUplo)
   521  	}
   522  	return lapacke.Dlansy(byte(norm), uplo, n, a, lda, work)
   523  }
   524  
   525  // Dlantr computes the specified norm of an m×n trapezoidal matrix A. If
   526  // norm == lapack.MaxColumnSum work must have length at least n, otherwise work
   527  // is unused.
   528  func (impl Implementation) Dlantr(norm lapack.MatrixNorm, uplo blas.Uplo, diag blas.Diag, m, n int, a []float64, lda int, work []float64) float64 {
   529  	checkMatrix(m, n, a, lda)
   530  	switch norm {
   531  	case lapack.MaxRowSum, lapack.MaxColumnSum, lapack.NormFrob, lapack.MaxAbs:
   532  	default:
   533  		panic(badNorm)
   534  	}
   535  	if uplo != blas.Upper && uplo != blas.Lower {
   536  		panic(badUplo)
   537  	}
   538  	if diag != blas.Unit && diag != blas.NonUnit {
   539  		panic(badDiag)
   540  	}
   541  	if norm == lapack.MaxColumnSum && len(work) < n {
   542  		panic(badWork)
   543  	}
   544  	return lapacke.Dlantr(byte(norm), uplo, diag, m, n, a, lda, work)
   545  }
   546  
   547  // Dlarfx applies an elementary reflector H to a real m×n matrix C, from either
   548  // the left or the right, with loop unrolling when the reflector has order less
   549  // than 11.
   550  //
   551  // H is represented in the form
   552  //  H = I - tau * v * v^T,
   553  // where tau is a real scalar and v is a real vector. If tau = 0, then H is
   554  // taken to be the identity matrix.
   555  //
   556  // v must have length equal to m if side == blas.Left, and equal to n if side ==
   557  // blas.Right, otherwise Dlarfx will panic.
   558  //
   559  // c and ldc represent the m×n matrix C. On return, C is overwritten by the
   560  // matrix H * C if side == blas.Left, or C * H if side == blas.Right.
   561  //
   562  // work must have length at least n if side == blas.Left, and at least m if side
   563  // == blas.Right, otherwise Dlarfx will panic. work is not referenced if H has
   564  // order < 11.
   565  func (impl Implementation) Dlarfx(side blas.Side, m, n int, v []float64, tau float64, c []float64, ldc int, work []float64) {
   566  	checkMatrix(m, n, c, ldc)
   567  	switch side {
   568  	case blas.Left:
   569  		checkVector(m, v, 1)
   570  		if len(work) < n && m > 10 {
   571  			panic(badWork)
   572  		}
   573  	case blas.Right:
   574  		checkVector(n, v, 1)
   575  		if len(work) < m && n > 10 {
   576  			panic(badWork)
   577  		}
   578  	default:
   579  		panic(badSide)
   580  	}
   581  
   582  	lapacke.Dlarfx(side, m, n, v, tau, c, ldc, work)
   583  }
   584  
   585  // Dlascl multiplies an m×n matrix by the scalar cto/cfrom.
   586  //
   587  // cfrom must not be zero, and cto and cfrom must not be NaN, otherwise Dlascl
   588  // will panic.
   589  //
   590  // Dlascl is an internal routine. It is exported for testing purposes.
   591  func (impl Implementation) Dlascl(kind lapack.MatrixType, kl, ku int, cfrom, cto float64, m, n int, a []float64, lda int) {
   592  	checkMatrix(m, n, a, lda)
   593  	if cfrom == 0 {
   594  		panic(zeroDiv)
   595  	}
   596  	if math.IsNaN(cfrom) || math.IsNaN(cto) {
   597  		panic(nanScale)
   598  	}
   599  	lapacke.Dlascl(byte(kind), kl, ku, cfrom, cto, m, n, a, lda)
   600  }
   601  
   602  // Dlaset sets the off-diagonal elements of A to alpha, and the diagonal
   603  // elements to beta. If uplo == blas.Upper, only the elements in the upper
   604  // triangular part are set. If uplo == blas.Lower, only the elements in the
   605  // lower triangular part are set. If uplo is otherwise, all of the elements of A
   606  // are set.
   607  //
   608  // Dlaset is an internal routine. It is exported for testing purposes.
   609  func (impl Implementation) Dlaset(uplo blas.Uplo, m, n int, alpha, beta float64, a []float64, lda int) {
   610  	checkMatrix(m, n, a, lda)
   611  	lapacke.Dlaset(uplo, m, n, alpha, beta, a, lda)
   612  }
   613  
   614  // Dlasrt sorts the numbers in the input slice d. If s == lapack.SortIncreasing,
   615  // the elements are sorted in increasing order. If s == lapack.SortDecreasing,
   616  // the elements are sorted in decreasing order. For other values of s Dlasrt
   617  // will panic.
   618  //
   619  // Dlasrt is an internal routine. It is exported for testing purposes.
   620  func (impl Implementation) Dlasrt(s lapack.Sort, n int, d []float64) {
   621  	checkVector(n, d, 1)
   622  	switch s {
   623  	default:
   624  		panic(badSort)
   625  	case lapack.SortIncreasing, lapack.SortDecreasing:
   626  	}
   627  	lapacke.Dlasrt(byte(s), n, d[:n])
   628  }
   629  
   630  // Dlaswp swaps the rows k1 to k2 of a rectangular matrix A according to the
   631  // indices in ipiv so that row k is swapped with ipiv[k].
   632  //
   633  // n is the number of columns of A and incX is the increment for ipiv. If incX
   634  // is 1, the swaps are applied from k1 to k2. If incX is -1, the swaps are
   635  // applied in reverse order from k2 to k1. For other values of incX Dlaswp will
   636  // panic. ipiv must have length k2+1, otherwise Dlaswp will panic.
   637  //
   638  // The indices k1, k2, and the elements of ipiv are zero-based.
   639  //
   640  // Dlaswp is an internal routine. It is exported for testing purposes.
   641  func (impl Implementation) Dlaswp(n int, a []float64, lda, k1, k2 int, ipiv []int, incX int) {
   642  	switch {
   643  	case n < 0:
   644  		panic(nLT0)
   645  	case k2 < 0:
   646  		panic(badK2)
   647  	case k1 < 0 || k2 < k1:
   648  		panic(badK1)
   649  	case len(ipiv) != k2+1:
   650  		panic(badIpiv)
   651  	case incX != 1 && incX != -1:
   652  		panic(absIncNotOne)
   653  	}
   654  
   655  	ipiv32 := make([]int32, len(ipiv))
   656  	for i, v := range ipiv {
   657  		ipiv32[i] = int32(v + 1)
   658  	}
   659  	lapacke.Dlaswp(n, a, lda, k1+1, k2+1, ipiv32, incX)
   660  }
   661  
   662  // Dpotrf computes the Cholesky decomposition of the symmetric positive definite
   663  // matrix a. If ul == blas.Upper, then a is stored as an upper-triangular matrix,
   664  // and a = U U^T is stored in place into a. If ul == blas.Lower, then a = L L^T
   665  // is computed and stored in-place into a. If a is not positive definite, false
   666  // is returned. This is the blocked version of the algorithm.
   667  func (impl Implementation) Dpotrf(ul blas.Uplo, n int, a []float64, lda int) (ok bool) {
   668  	// ul is checked in lapacke.Dpotrf.
   669  	if n < 0 {
   670  		panic(nLT0)
   671  	}
   672  	if lda < n {
   673  		panic(badLdA)
   674  	}
   675  	if n == 0 {
   676  		return true
   677  	}
   678  	return lapacke.Dpotrf(ul, n, a, lda)
   679  }
   680  
   681  // Dgebal balances an n×n matrix A. Balancing consists of two stages, permuting
   682  // and scaling. Both steps are optional and depend on the value of job.
   683  //
   684  // Permuting consists of applying a permutation matrix P such that the matrix
   685  // that results from P^T*A*P takes the upper block triangular form
   686  //            [ T1  X  Y  ]
   687  //  P^T A P = [  0  B  Z  ],
   688  //            [  0  0  T2 ]
   689  // where T1 and T2 are upper triangular matrices and B contains at least one
   690  // nonzero off-diagonal element in each row and column. The indices ilo and ihi
   691  // mark the starting and ending columns of the submatrix B. The eigenvalues of A
   692  // isolated in the first 0 to ilo-1 and last ihi+1 to n-1 elements on the
   693  // diagonal can be read off without any roundoff error.
   694  //
   695  // Scaling consists of applying a diagonal similarity transformation D such that
   696  // D^{-1}*B*D has the 1-norm of each row and its corresponding column nearly
   697  // equal. The output matrix is
   698  //  [ T1     X*D          Y    ]
   699  //  [  0  inv(D)*B*D  inv(D)*Z ].
   700  //  [  0      0           T2   ]
   701  // Scaling may reduce the 1-norm of the matrix, and improve the accuracy of
   702  // the computed eigenvalues and/or eigenvectors.
   703  //
   704  // job specifies the operations that will be performed on A.
   705  // If job is lapack.None, Dgebal sets scale[i] = 1 for all i and returns ilo=0, ihi=n-1.
   706  // If job is lapack.Permute, only permuting will be done.
   707  // If job is lapack.Scale, only scaling will be done.
   708  // If job is lapack.PermuteScale, both permuting and scaling will be done.
   709  //
   710  // On return, if job is lapack.Permute or lapack.PermuteScale, it will hold that
   711  //  A[i,j] == 0,   for i > j and j ∈ {0, ..., ilo-1, ihi+1, ..., n-1}.
   712  // If job is lapack.None or lapack.Scale, or if n == 0, it will hold that
   713  //  ilo == 0 and ihi == n-1.
   714  //
   715  // On return, scale will contain information about the permutations and scaling
   716  // factors applied to A. If π(j) denotes the index of the column interchanged
   717  // with column j, and D[j,j] denotes the scaling factor applied to column j,
   718  // then
   719  //  scale[j] == π(j),     for j ∈ {0, ..., ilo-1, ihi+1, ..., n-1},
   720  //           == D[j,j],   for j ∈ {ilo, ..., ihi}.
   721  // scale must have length equal to n, otherwise Dgebal will panic.
   722  //
   723  // Dgebal is an internal routine. It is exported for testing purposes.
   724  func (impl Implementation) Dgebal(job lapack.Job, n int, a []float64, lda int, scale []float64) (ilo, ihi int) {
   725  	switch job {
   726  	default:
   727  		panic(badJob)
   728  	case lapack.None, lapack.Permute, lapack.Scale, lapack.PermuteScale:
   729  	}
   730  	checkMatrix(n, n, a, lda)
   731  	if len(scale) != n {
   732  		panic("lapack: bad length of scale")
   733  	}
   734  
   735  	ilo32 := make([]int32, 1)
   736  	ihi32 := make([]int32, 1)
   737  	lapacke.Dgebal(job, n, a, lda, ilo32, ihi32, scale)
   738  	ilo = int(ilo32[0]) - 1
   739  	ihi = int(ihi32[0]) - 1
   740  	for j := 0; j < ilo; j++ {
   741  		scale[j]--
   742  	}
   743  	for j := ihi + 1; j < n; j++ {
   744  		scale[j]--
   745  	}
   746  	return ilo, ihi
   747  }
   748  
   749  // Dgebak transforms an n×m matrix V as
   750  //  V = P D V,        if side == blas.Right,
   751  //  V = P D^{-1} V,   if side == blas.Left,
   752  // where P and D are n×n permutation and scaling matrices, respectively,
   753  // implicitly represented by job, scale, ilo and ihi as returned by Dgebal.
   754  //
   755  // Typically, columns of the matrix V contain the right or left (determined by
   756  // side) eigenvectors of the balanced matrix output by Dgebal, and Dgebak forms
   757  // the eigenvectors of the original matrix.
   758  //
   759  // Dgebak is an internal routine. It is exported for testing purposes.
   760  func (impl Implementation) Dgebak(job lapack.Job, side lapack.EVSide, n, ilo, ihi int, scale []float64, m int, v []float64, ldv int) {
   761  	switch job {
   762  	default:
   763  		panic(badJob)
   764  	case lapack.None, lapack.Permute, lapack.Scale, lapack.PermuteScale:
   765  	}
   766  	var bside blas.Side
   767  	switch side {
   768  	default:
   769  		panic(badSide)
   770  	case lapack.LeftEV:
   771  		bside = blas.Left
   772  	case lapack.RightEV:
   773  		bside = blas.Right
   774  	}
   775  	checkMatrix(n, m, v, ldv)
   776  	switch {
   777  	case ilo < 0 || max(0, n-1) < ilo:
   778  		panic(badIlo)
   779  	case ihi < min(ilo, n-1) || n <= ihi:
   780  		panic(badIhi)
   781  	}
   782  
   783  	// Convert permutation indices to 1-based.
   784  	for j := 0; j < ilo; j++ {
   785  		scale[j]++
   786  	}
   787  	for j := ihi + 1; j < n; j++ {
   788  		scale[j]++
   789  	}
   790  	lapacke.Dgebak(job, bside, n, ilo+1, ihi+1, scale, m, v, ldv)
   791  	// Convert permutation indices back to 0-based.
   792  	for j := 0; j < ilo; j++ {
   793  		scale[j]--
   794  	}
   795  	for j := ihi + 1; j < n; j++ {
   796  		scale[j]--
   797  	}
   798  }
   799  
   800  // Dbdsqr performs a singular value decomposition of a real n×n bidiagonal matrix.
   801  //
   802  // The SVD of the bidiagonal matrix B is
   803  //  B = Q * S * P^T
   804  // where S is a diagonal matrix of singular values, Q is an orthogonal matrix of
   805  // left singular vectors, and P is an orthogonal matrix of right singular vectors.
   806  //
   807  // Q and P are only computed if requested. If left singular vectors are requested,
   808  // this routine returns U * Q instead of Q, and if right singular vectors are
   809  // requested P^T * VT is returned instead of P^T.
   810  //
   811  // Frequently Dbdsqr is used in conjunction with Dgebrd which reduces a general
   812  // matrix A into bidiagonal form. In this case, the SVD of A is
   813  //  A = (U * Q) * S * (P^T * VT)
   814  //
   815  // This routine may also compute Q^T * C.
   816  //
   817  // d and e contain the elements of the bidiagonal matrix b. d must have length at
   818  // least n, and e must have length at least n-1. Dbdsqr will panic if there is
   819  // insufficient length. On exit, D contains the singular values of B in decreasing
   820  // order.
   821  //
   822  // VT is a matrix of size n×ncvt whose elements are stored in vt. The elements
   823  // of vt are modified to contain P^T * VT on exit. VT is not used if ncvt == 0.
   824  //
   825  // U is a matrix of size nru×n whose elements are stored in u. The elements
   826  // of u are modified to contain U * Q on exit. U is not used if nru == 0.
   827  //
   828  // C is a matrix of size n×ncc whose elements are stored in c. The elements
   829  // of c are modified to contain Q^T * C on exit. C is not used if ncc == 0.
   830  //
   831  // work contains temporary storage and must have length at least 4*n. Dbdsqr
   832  // will panic if there is insufficient working memory.
   833  //
   834  // Dbdsqr returns whether the decomposition was successful.
   835  func (impl Implementation) Dbdsqr(uplo blas.Uplo, n, ncvt, nru, ncc int, d, e, vt []float64, ldvt int, u []float64, ldu int, c []float64, ldc int, work []float64) (ok bool) {
   836  	if uplo != blas.Upper && uplo != blas.Lower {
   837  		panic(badUplo)
   838  	}
   839  	if ncvt != 0 {
   840  		checkMatrix(n, ncvt, vt, ldvt)
   841  	}
   842  	if nru != 0 {
   843  		checkMatrix(nru, n, u, ldu)
   844  	}
   845  	if ncc != 0 {
   846  		checkMatrix(n, ncc, c, ldc)
   847  	}
   848  	if len(d) < n {
   849  		panic(badD)
   850  	}
   851  	if len(e) < n-1 {
   852  		panic(badE)
   853  	}
   854  	if len(work) < 4*n {
   855  		panic(badWork)
   856  	}
   857  	// An address must be passed to cgo. If lengths are zero, allocate a slice.
   858  	if len(vt) == 0 {
   859  		vt = make([]float64, 1)
   860  	}
   861  	if len(u) == 0 {
   862  		vt = make([]float64, 1)
   863  	}
   864  	if len(c) == 0 {
   865  		c = make([]float64, 1)
   866  	}
   867  	return lapacke.Dbdsqr(uplo, n, ncvt, nru, ncc, d, e, vt, ldvt, u, ldu, c, ldc, work)
   868  }
   869  
   870  // Dgebrd reduces a general m×n matrix A to upper or lower bidiagonal form B by
   871  // an orthogonal transformation:
   872  //  Q^T * A * P = B.
   873  // The diagonal elements of B are stored in d and the off-diagonal elements are
   874  // stored in e. These are additionally stored along the diagonal of A and the
   875  // off-diagonal of A. If m >= n B is an upper-bidiagonal matrix, and if m < n B
   876  // is a lower-bidiagonal matrix.
   877  //
   878  // The remaining elements of A store the data needed to construct Q and P.
   879  // The matrices Q and P are products of elementary reflectors
   880  //  if m >= n, Q = H_0 * H_1 * ... * H_{n-1},
   881  //             P = G_0 * G_1 * ... * G_{n-2},
   882  //  if m < n,  Q = H_0 * H_1 * ... * H_{m-2},
   883  //             P = G_0 * G_1 * ... * G_{m-1},
   884  // where
   885  //  H_i = I - tauQ[i] * v_i * v_i^T,
   886  //  G_i = I - tauP[i] * u_i * u_i^T.
   887  //
   888  // As an example, on exit the entries of A when m = 6, and n = 5
   889  //  [ d   e  u1  u1  u1]
   890  //  [v1   d   e  u2  u2]
   891  //  [v1  v2   d   e  u3]
   892  //  [v1  v2  v3   d   e]
   893  //  [v1  v2  v3  v4   d]
   894  //  [v1  v2  v3  v4  v5]
   895  // and when m = 5, n = 6
   896  //  [ d  u1  u1  u1  u1  u1]
   897  //  [ e   d  u2  u2  u2  u2]
   898  //  [v1   e   d  u3  u3  u3]
   899  //  [v1  v2   e   d  u4  u4]
   900  //  [v1  v2  v3   e   d  u5]
   901  //
   902  // d, tauQ, and tauP must all have length at least min(m,n), and e must have
   903  // length min(m,n) - 1, unless lwork is -1 when there is no check except for
   904  // work which must have a length of at least one.
   905  //
   906  // work is temporary storage, and lwork specifies the usable memory length.
   907  // At minimum, lwork >= max(1,m,n) or be -1 and this function will panic otherwise.
   908  // Dgebrd is blocked decomposition, but the block size is limited
   909  // by the temporary space available. If lwork == -1, instead of performing Dgebrd,
   910  // the optimal work length will be stored into work[0].
   911  //
   912  // Dgebrd is an internal routine. It is exported for testing purposes.
   913  func (impl Implementation) Dgebrd(m, n int, a []float64, lda int, d, e, tauQ, tauP, work []float64, lwork int) {
   914  	checkMatrix(m, n, a, lda)
   915  	minmn := min(m, n)
   916  	if len(d) < minmn {
   917  		panic(badD)
   918  	}
   919  	if len(e) < minmn-1 {
   920  		panic(badE)
   921  	}
   922  	if len(tauQ) < minmn {
   923  		panic(badTauQ)
   924  	}
   925  	if len(tauP) < minmn {
   926  		panic(badTauP)
   927  	}
   928  	if lwork != -1 && lwork < max(1, max(m, n)) {
   929  		panic(badWork)
   930  	}
   931  	if len(work) < max(1, lwork) {
   932  		panic(badWork)
   933  	}
   934  
   935  	lapacke.Dgebrd(m, n, a, lda, d, e, tauQ, tauP, work, lwork)
   936  }
   937  
   938  // Dgecon estimates the reciprocal of the condition number of the n×n matrix A
   939  // given the LU decomposition of the matrix. The condition number computed may
   940  // be based on the 1-norm or the ∞-norm.
   941  //
   942  // The slice a contains the result of the LU decomposition of A as computed by Dgetrf.
   943  //
   944  // anorm is the corresponding 1-norm or ∞-norm of the original matrix A.
   945  //
   946  // work is a temporary data slice of length at least 4*n and Dgecon will panic otherwise.
   947  //
   948  // iwork is a temporary data slice of length at least n and Dgecon will panic otherwise.
   949  // Elements of iwork must fit within the int32 type or Dgecon will panic.
   950  func (impl Implementation) Dgecon(norm lapack.MatrixNorm, n int, a []float64, lda int, anorm float64, work []float64, iwork []int) float64 {
   951  	checkMatrix(n, n, a, lda)
   952  	if norm != lapack.MaxColumnSum && norm != lapack.MaxRowSum {
   953  		panic("bad norm")
   954  	}
   955  	if len(work) < 4*n {
   956  		panic(badWork)
   957  	}
   958  	if len(iwork) < n {
   959  		panic(badWork)
   960  	}
   961  	rcond := make([]float64, 1)
   962  	_iwork := make([]int32, len(iwork))
   963  	for i, v := range iwork {
   964  		if v != int(int32(v)) {
   965  			panic("lapack: iwork element out of range")
   966  		}
   967  		_iwork[i] = int32(v)
   968  	}
   969  	lapacke.Dgecon(byte(norm), n, a, lda, anorm, rcond, work, _iwork)
   970  	for i, v := range _iwork {
   971  		iwork[i] = int(v)
   972  	}
   973  	return rcond[0]
   974  }
   975  
   976  // Dgelq2 computes the LQ factorization of the m×n matrix A.
   977  //
   978  // In an LQ factorization, L is a lower triangular m×n matrix, and Q is an n×n
   979  // orthonormal matrix.
   980  //
   981  // a is modified to contain the information to construct L and Q.
   982  // The lower triangle of a contains the matrix L. The upper triangular elements
   983  // (not including the diagonal) contain the elementary reflectors. Tau is modified
   984  // to contain the reflector scales. tau must have length of at least k = min(m,n)
   985  // and this function will panic otherwise.
   986  //
   987  // See Dgeqr2 for a description of the elementary reflectors and orthonormal
   988  // matrix Q. Q is constructed as a product of these elementary reflectors,
   989  //  Q = H_{k-1} * ... * H_1 * H_0,
   990  // where k = min(m,n).
   991  //
   992  // Work is temporary storage of length at least m and this function will panic otherwise.
   993  func (impl Implementation) Dgelq2(m, n int, a []float64, lda int, tau, work []float64) {
   994  	checkMatrix(m, n, a, lda)
   995  	if len(tau) < min(m, n) {
   996  		panic(badTau)
   997  	}
   998  	if len(work) < m {
   999  		panic(badWork)
  1000  	}
  1001  	lapacke.Dgelq2(m, n, a, lda, tau, work)
  1002  }
  1003  
  1004  // Dgelqf computes the LQ factorization of the m×n matrix A using a blocked
  1005  // algorithm. See the documentation for Dgelq2 for a description of the
  1006  // parameters at entry and exit.
  1007  //
  1008  // work is temporary storage, and lwork specifies the usable memory length.
  1009  // At minimum, lwork >= m, and this function will panic otherwise.
  1010  // Dgelqf is a blocked LQ factorization, but the block size is limited
  1011  // by the temporary space available. If lwork == -1, instead of performing Dgelqf,
  1012  // the optimal work length will be stored into work[0].
  1013  //
  1014  // tau must have length at least min(m,n), and this function will panic otherwise.
  1015  func (impl Implementation) Dgelqf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
  1016  	if lwork == -1 {
  1017  		work[0] = float64(m)
  1018  		return
  1019  	}
  1020  	checkMatrix(m, n, a, lda)
  1021  	if len(work) < lwork {
  1022  		panic(shortWork)
  1023  	}
  1024  	if lwork < m {
  1025  		panic(badWork)
  1026  	}
  1027  	if len(tau) < min(m, n) {
  1028  		panic(badTau)
  1029  	}
  1030  	lapacke.Dgelqf(m, n, a, lda, tau, work, lwork)
  1031  }
  1032  
  1033  // Dgeqr2 computes a QR factorization of the m×n matrix A.
  1034  //
  1035  // In a QR factorization, Q is an m×m orthonormal matrix, and R is an
  1036  // upper triangular m×n matrix.
  1037  //
  1038  // A is modified to contain the information to construct Q and R.
  1039  // The upper triangle of a contains the matrix R. The lower triangular elements
  1040  // (not including the diagonal) contain the elementary reflectors. Tau is modified
  1041  // to contain the reflector scales. tau must have length at least min(m,n), and
  1042  // this function will panic otherwise.
  1043  //
  1044  // The ith elementary reflector can be explicitly constructed by first extracting
  1045  // the
  1046  //  v[j] = 0           j < i
  1047  //  v[j] = 1           j == i
  1048  //  v[j] = a[j*lda+i]  j > i
  1049  // and computing H_i = I - tau[i] * v * v^T.
  1050  //
  1051  // The orthonormal matrix Q can be constucted from a product of these elementary
  1052  // reflectors, Q = H_0 * H_1 * ... * H_{k-1}, where k = min(m,n).
  1053  //
  1054  // Work is temporary storage of length at least n and this function will panic otherwise.
  1055  func (impl Implementation) Dgeqr2(m, n int, a []float64, lda int, tau, work []float64) {
  1056  	checkMatrix(m, n, a, lda)
  1057  	if len(work) < n {
  1058  		panic(badWork)
  1059  	}
  1060  	k := min(m, n)
  1061  	if len(tau) < k {
  1062  		panic(badTau)
  1063  	}
  1064  	lapacke.Dgeqr2(m, n, a, lda, tau, work)
  1065  }
  1066  
  1067  // Dgeqrf computes the QR factorization of the m×n matrix A using a blocked
  1068  // algorithm. See the documentation for Dgeqr2 for a description of the
  1069  // parameters at entry and exit.
  1070  //
  1071  // work is temporary storage, and lwork specifies the usable memory length.
  1072  // The length of work must be at least max(1, lwork) and lwork must be -1
  1073  // or at least n, otherwise this function will panic.
  1074  // Dgeqrf is a blocked QR factorization, but the block size is limited
  1075  // by the temporary space available. If lwork == -1, instead of performing Dgeqrf,
  1076  // the optimal work length will be stored into work[0].
  1077  //
  1078  // tau must have length at least min(m,n), and this function will panic otherwise.
  1079  func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
  1080  	if len(work) < max(1, lwork) {
  1081  		panic(shortWork)
  1082  	}
  1083  	if lwork == -1 {
  1084  		work[0] = float64(n)
  1085  		return
  1086  	}
  1087  	checkMatrix(m, n, a, lda)
  1088  	if lwork < n {
  1089  		panic(badWork)
  1090  	}
  1091  	k := min(m, n)
  1092  	if len(tau) < k {
  1093  		panic(badTau)
  1094  	}
  1095  	lapacke.Dgeqrf(m, n, a, lda, tau, work, lwork)
  1096  }
  1097  
  1098  // Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg
  1099  // form H by an orthogonal similarity transformation Q^T * A * Q = H.
  1100  //
  1101  // The matrix Q is represented as a product of (ihi-ilo) elementary
  1102  // reflectors
  1103  //  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
  1104  // Each H_i has the form
  1105  //  H_i = I - tau[i] * v * v^T
  1106  // where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0.
  1107  // v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i].
  1108  //
  1109  // On entry, a contains the n×n general matrix to be reduced. On return, the
  1110  // upper triangle and the first subdiagonal of A will be overwritten with the
  1111  // upper Hessenberg matrix H, and the elements below the first subdiagonal, with
  1112  // the slice tau, represent the orthogonal matrix Q as a product of elementary
  1113  // reflectors.
  1114  //
  1115  // The contents of a are illustrated by the following example, with n = 7, ilo =
  1116  // 1 and ihi = 5.
  1117  // On entry,
  1118  //  [ a   a   a   a   a   a   a ]
  1119  //  [     a   a   a   a   a   a ]
  1120  //  [     a   a   a   a   a   a ]
  1121  //  [     a   a   a   a   a   a ]
  1122  //  [     a   a   a   a   a   a ]
  1123  //  [     a   a   a   a   a   a ]
  1124  //  [                         a ]
  1125  // on return,
  1126  //  [ a   a   h   h   h   h   a ]
  1127  //  [     a   h   h   h   h   a ]
  1128  //  [     h   h   h   h   h   h ]
  1129  //  [     v1  h   h   h   h   h ]
  1130  //  [     v1  v2  h   h   h   h ]
  1131  //  [     v1  v2  v3  h   h   h ]
  1132  //  [                         a ]
  1133  // where a denotes an element of the original matrix A, h denotes a
  1134  // modified element of the upper Hessenberg matrix H, and vi denotes an
  1135  // element of the vector defining H_i.
  1136  //
  1137  // ilo and ihi determine the block of A that will be reduced to upper Hessenberg
  1138  // form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi ==
  1139  // -1 if n == 0, otherwise Dgehrd will panic.
  1140  //
  1141  // On return, tau will contain the scalar factors of the elementary reflectors.
  1142  // Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length
  1143  // equal to n-1 if n > 0, otherwise Dgehrd will panic.
  1144  //
  1145  // work must have length at least lwork and lwork must be at least max(1,n),
  1146  // otherwise Dgehrd will panic. On return, work[0] contains the optimal value of
  1147  // lwork.
  1148  //
  1149  // If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork
  1150  // will be stored in work[0].
  1151  //
  1152  // Dgehrd is an internal routine. It is exported for testing purposes.
  1153  func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
  1154  	switch {
  1155  	case ilo < 0 || max(0, n-1) < ilo:
  1156  		panic(badIlo)
  1157  	case ihi < min(ilo, n-1) || n <= ihi:
  1158  		panic(badIhi)
  1159  	case lwork < max(1, n) && lwork != -1:
  1160  		panic(badWork)
  1161  	case len(work) < lwork:
  1162  		panic(shortWork)
  1163  	}
  1164  	if lwork != -1 {
  1165  		checkMatrix(n, n, a, lda)
  1166  		if len(tau) != n-1 && n > 0 {
  1167  			panic(badTau)
  1168  		}
  1169  	}
  1170  	lapacke.Dgehrd(n, ilo+1, ihi+1, a, lda, tau, work, lwork)
  1171  }
  1172  
  1173  // Dgels finds a minimum-norm solution based on the matrices A and B using the
  1174  // QR or LQ factorization. Dgels returns false if the matrix
  1175  // A is singular, and true if this solution was successfully found.
  1176  //
  1177  // The minimization problem solved depends on the input parameters.
  1178  //
  1179  //  1. If m >= n and trans == blas.NoTrans, Dgels finds X such that || A*X - B||_2
  1180  //     is minimized.
  1181  //  2. If m < n and trans == blas.NoTrans, Dgels finds the minimum norm solution of
  1182  //     A * X = B.
  1183  //  3. If m >= n and trans == blas.Trans, Dgels finds the minimum norm solution of
  1184  //     A^T * X = B.
  1185  //  4. If m < n and trans == blas.Trans, Dgels finds X such that || A*X - B||_2
  1186  //     is minimized.
  1187  // Note that the least-squares solutions (cases 1 and 3) perform the minimization
  1188  // per column of B. This is not the same as finding the minimum-norm matrix.
  1189  //
  1190  // The matrix A is a general matrix of size m×n and is modified during this call.
  1191  // The input matrix B is of size max(m,n)×nrhs, and serves two purposes. On entry,
  1192  // the elements of b specify the input matrix B. B has size m×nrhs if
  1193  // trans == blas.NoTrans, and n×nrhs if trans == blas.Trans. On exit, the
  1194  // leading submatrix of b contains the solution vectors X. If trans == blas.NoTrans,
  1195  // this submatrix is of size n×nrhs, and of size m×nrhs otherwise.
  1196  //
  1197  // work is temporary storage, and lwork specifies the usable memory length.
  1198  // At minimum, lwork >= max(m,n) + max(m,n,nrhs), and this function will panic
  1199  // otherwise. A longer work will enable blocked algorithms to be called.
  1200  // In the special case that lwork == -1, work[0] will be set to the optimal working
  1201  // length.
  1202  func (impl Implementation) Dgels(trans blas.Transpose, m, n, nrhs int, a []float64, lda int, b []float64, ldb int, work []float64, lwork int) bool {
  1203  	mn := min(m, n)
  1204  	if lwork == -1 {
  1205  		work[0] = float64(mn + max(mn, nrhs))
  1206  		return true
  1207  	}
  1208  	checkMatrix(m, n, a, lda)
  1209  	checkMatrix(max(m, n), nrhs, b, ldb)
  1210  	if len(work) < lwork {
  1211  		panic(shortWork)
  1212  	}
  1213  	if lwork < mn+max(mn, nrhs) {
  1214  		panic(badWork)
  1215  	}
  1216  	return lapacke.Dgels(trans, m, n, nrhs, a, lda, b, ldb, work, lwork)
  1217  }
  1218  
  1219  const noSVDO = "dgesvd: not coded for overwrite"
  1220  
  1221  // Dgesvd computes the singular value decomposition of the input matrix A.
  1222  //
  1223  // The singular value decomposition is
  1224  //  A = U * Sigma * V^T
  1225  // where Sigma is an m×n diagonal matrix containing the singular values of A,
  1226  // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
  1227  // min(m,n) columns of U and V are the left and right singular vectors of A
  1228  // respectively.
  1229  //
  1230  // jobU and jobVT are options for computing the singular vectors. The behavior
  1231  // is as follows
  1232  //  jobU == lapack.SVDAll       All m columns of U are returned in u
  1233  //  jobU == lapack.SVDInPlace   The first min(m,n) columns are returned in u
  1234  //  jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
  1235  //  jobU == lapack.SVDNone      The columns of U are not computed.
  1236  // The behavior is the same for jobVT and the rows of V^T. At most one of jobU
  1237  // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
  1238  //
  1239  // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
  1240  // the data is overwritten. On exit, A contains the appropriate singular vectors
  1241  // if either job is lapack.SVDOverwrite.
  1242  //
  1243  // s is a slice of length at least min(m,n) and on exit contains the singular
  1244  // values in decreasing order.
  1245  //
  1246  // u contains the left singular vectors on exit, stored column-wise. If
  1247  // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is
  1248  // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
  1249  // not used.
  1250  //
  1251  // vt contains the left singular vectors on exit, stored rowwise. If
  1252  // jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is
  1253  // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
  1254  // not used.
  1255  //
  1256  // work is a slice for storing temporary memory, and lwork is the usable size of
  1257  // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
  1258  // If lwork == -1, instead of performing Dgesvd, the optimal work length will be
  1259  // stored into work[0]. Dgesvd will panic if the working memory has insufficient
  1260  // storage.
  1261  //
  1262  // Dgesvd returns whether the decomposition successfully completed.
  1263  func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
  1264  	checkMatrix(m, n, a, lda)
  1265  	if jobU == lapack.SVDAll {
  1266  		checkMatrix(m, m, u, ldu)
  1267  	} else if jobU == lapack.SVDInPlace {
  1268  		checkMatrix(m, min(m, n), u, ldu)
  1269  	}
  1270  	if jobVT == lapack.SVDAll {
  1271  		checkMatrix(n, n, vt, ldvt)
  1272  	} else if jobVT == lapack.SVDInPlace {
  1273  		checkMatrix(min(m, n), n, vt, ldvt)
  1274  	}
  1275  	if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
  1276  		panic(noSVDO)
  1277  	}
  1278  	if len(s) < min(m, n) {
  1279  		panic(badS)
  1280  	}
  1281  	if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
  1282  		panic("lapack: SVD not coded to overwrite original matrix")
  1283  	}
  1284  	minWork := max(5*min(m, n), 3*min(m, n)+max(m, n))
  1285  	if lwork != -1 {
  1286  		if len(work) < lwork {
  1287  			panic(badWork)
  1288  		}
  1289  		if lwork < minWork {
  1290  			panic(badWork)
  1291  		}
  1292  	}
  1293  	if lwork == -1 {
  1294  		work[0] = float64(minWork)
  1295  		return true
  1296  	}
  1297  	return lapacke.Dgesvd(lapack.Job(jobU), lapack.Job(jobVT), m, n, a, lda, s, u, ldu, vt, ldvt, work, lwork)
  1298  }
  1299  
  1300  // Dgetf2 computes the LU decomposition of the m×n matrix A.
  1301  // The LU decomposition is a factorization of a into
  1302  //  A = P * L * U
  1303  // where P is a permutation matrix, L is a unit lower triangular matrix, and
  1304  // U is a (usually) non-unit upper triangular matrix. On exit, L and U are stored
  1305  // in place into a.
  1306  //
  1307  // ipiv is a permutation vector. It indicates that row i of the matrix was
  1308  // changed with ipiv[i]. ipiv must have length at least min(m,n), and will panic
  1309  // otherwise. ipiv is zero-indexed.
  1310  //
  1311  // Dgetf2 returns whether the matrix A is singular. The LU decomposition will
  1312  // be computed regardless of the singularity of A, but division by zero
  1313  // will occur if the false is returned and the result is used to solve a
  1314  // system of equations.
  1315  func (Implementation) Dgetf2(m, n int, a []float64, lda int, ipiv []int) (ok bool) {
  1316  	mn := min(m, n)
  1317  	checkMatrix(m, n, a, lda)
  1318  	if len(ipiv) < mn {
  1319  		panic(badIpiv)
  1320  	}
  1321  	ipiv32 := make([]int32, len(ipiv))
  1322  	ok = lapacke.Dgetf2(m, n, a, lda, ipiv32)
  1323  	for i, v := range ipiv32 {
  1324  		ipiv[i] = int(v) - 1 // Transform to zero-indexed.
  1325  	}
  1326  	return ok
  1327  }
  1328  
  1329  // Dgetrf computes the LU decomposition of the m×n matrix A.
  1330  // The LU decomposition is a factorization of A into
  1331  //  A = P * L * U
  1332  // where P is a permutation matrix, L is a unit lower triangular matrix, and
  1333  // U is a (usually) non-unit upper triangular matrix. On exit, L and U are stored
  1334  // in place into a.
  1335  //
  1336  // ipiv is a permutation vector. It indicates that row i of the matrix was
  1337  // changed with ipiv[i]. ipiv must have length at least min(m,n), and will panic
  1338  // otherwise. ipiv is zero-indexed.
  1339  //
  1340  // Dgetrf is the blocked version of the algorithm.
  1341  //
  1342  // Dgetrf returns whether the matrix A is singular. The LU decomposition will
  1343  // be computed regardless of the singularity of A, but division by zero
  1344  // will occur if the false is returned and the result is used to solve a
  1345  // system of equations.
  1346  func (impl Implementation) Dgetrf(m, n int, a []float64, lda int, ipiv []int) (ok bool) {
  1347  	mn := min(m, n)
  1348  	checkMatrix(m, n, a, lda)
  1349  	if len(ipiv) < mn {
  1350  		panic(badIpiv)
  1351  	}
  1352  	ipiv32 := make([]int32, len(ipiv))
  1353  	ok = lapacke.Dgetrf(m, n, a, lda, ipiv32)
  1354  	for i, v := range ipiv32 {
  1355  		ipiv[i] = int(v) - 1 // Transform to zero-indexed.
  1356  	}
  1357  	return ok
  1358  }
  1359  
  1360  // Dgetri computes the inverse of the matrix A using the LU factorization computed
  1361  // by Dgetrf. On entry, a contains the PLU decomposition of A as computed by
  1362  // Dgetrf and on exit contains the reciprocal of the original matrix.
  1363  //
  1364  // Dgetri will not perform the inversion if the matrix is singular, and returns
  1365  // a boolean indicating whether the inversion was successful.
  1366  //
  1367  // work is temporary storage, and lwork specifies the usable memory length.
  1368  // At minimum, lwork >= n and this function will panic otherwise.
  1369  // Dgetri is a blocked inversion, but the block size is limited
  1370  // by the temporary space available. If lwork == -1, instead of performing Dgetri,
  1371  // the optimal work length will be stored into work[0].
  1372  func (impl Implementation) Dgetri(n int, a []float64, lda int, ipiv []int, work []float64, lwork int) (ok bool) {
  1373  	checkMatrix(n, n, a, lda)
  1374  	if len(ipiv) < n {
  1375  		panic(badIpiv)
  1376  	}
  1377  	if lwork == -1 {
  1378  		work[0] = float64(n)
  1379  		return true
  1380  	}
  1381  	if lwork < n {
  1382  		panic(badWork)
  1383  	}
  1384  	if len(work) < lwork {
  1385  		panic(badWork)
  1386  	}
  1387  	ipiv32 := make([]int32, len(ipiv))
  1388  	for i, v := range ipiv {
  1389  		ipiv32[i] = int32(v) + 1 // Transform to one-indexed.
  1390  	}
  1391  	return lapacke.Dgetri(n, a, lda, ipiv32, work, lwork)
  1392  }
  1393  
  1394  // Dgetrs solves a system of equations using an LU factorization.
  1395  // The system of equations solved is
  1396  //  A * X = B if trans == blas.Trans
  1397  //  A^T * X = B if trans == blas.NoTrans
  1398  // A is a general n×n matrix with stride lda. B is a general matrix of size n×nrhs.
  1399  //
  1400  // On entry b contains the elements of the matrix B. On exit, b contains the
  1401  // elements of X, the solution to the system of equations.
  1402  //
  1403  // a and ipiv contain the LU factorization of A and the permutation indices as
  1404  // computed by Dgetrf. ipiv is zero-indexed.
  1405  func (impl Implementation) Dgetrs(trans blas.Transpose, n, nrhs int, a []float64, lda int, ipiv []int, b []float64, ldb int) {
  1406  	checkMatrix(n, n, a, lda)
  1407  	checkMatrix(n, nrhs, b, ldb)
  1408  	if len(ipiv) < n {
  1409  		panic(badIpiv)
  1410  	}
  1411  	ipiv32 := make([]int32, len(ipiv))
  1412  	for i, v := range ipiv {
  1413  		ipiv32[i] = int32(v) + 1 // Transform to one-indexed.
  1414  	}
  1415  	lapacke.Dgetrs(trans, n, nrhs, a, lda, ipiv32, b, ldb)
  1416  }
  1417  
  1418  // Dggsvd3 computes the generalized singular value decomposition (GSVD)
  1419  // of an m×n matrix A and p×n matrix B:
  1420  //  U^T*A*Q = D1*[ 0 R ]
  1421  //
  1422  //  V^T*B*Q = D2*[ 0 R ]
  1423  // where U, V and Q are orthogonal matrices.
  1424  //
  1425  // Dggsvd3 returns k and l, the dimensions of the sub-blocks. k+l
  1426  // is the effective numerical rank of the (m+p)×n matrix [ A^T B^T ]^T.
  1427  // R is a (k+l)×(k+l) nonsingular upper triangular matrix, D1 and
  1428  // D2 are m×(k+l) and p×(k+l) diagonal matrices and of the following
  1429  // structures, respectively:
  1430  //
  1431  // If m-k-l >= 0,
  1432  //
  1433  //                    k  l
  1434  //       D1 =     k [ I  0 ]
  1435  //                l [ 0  C ]
  1436  //            m-k-l [ 0  0 ]
  1437  //
  1438  //                  k  l
  1439  //       D2 = l   [ 0  S ]
  1440  //            p-l [ 0  0 ]
  1441  //
  1442  //               n-k-l  k    l
  1443  //  [ 0 R ] = k [  0   R11  R12 ] k
  1444  //            l [  0    0   R22 ] l
  1445  //
  1446  // where
  1447  //
  1448  //  C = diag( alpha_k, ... , alpha_{k+l} ),
  1449  //  S = diag( beta_k,  ... , beta_{k+l} ),
  1450  //  C^2 + S^2 = I.
  1451  //
  1452  // R is stored in
  1453  //  A[0:k+l, n-k-l:n]
  1454  // on exit.
  1455  //
  1456  // If m-k-l < 0,
  1457  //
  1458  //                 k m-k k+l-m
  1459  //      D1 =   k [ I  0    0  ]
  1460  //           m-k [ 0  C    0  ]
  1461  //
  1462  //                   k m-k k+l-m
  1463  //      D2 =   m-k [ 0  S    0  ]
  1464  //           k+l-m [ 0  0    I  ]
  1465  //             p-l [ 0  0    0  ]
  1466  //
  1467  //                 n-k-l  k   m-k  k+l-m
  1468  //  [ 0 R ] =    k [ 0    R11  R12  R13 ]
  1469  //             m-k [ 0     0   R22  R23 ]
  1470  //           k+l-m [ 0     0    0   R33 ]
  1471  //
  1472  // where
  1473  //  C = diag( alpha_k, ... , alpha_m ),
  1474  //  S = diag( beta_k,  ... , beta_m ),
  1475  //  C^2 + S^2 = I.
  1476  //
  1477  //  R = [ R11 R12 R13 ] is stored in A[1:m, n-k-l+1:n]
  1478  //      [  0  R22 R23 ]
  1479  // and R33 is stored in
  1480  //  B[m-k:l, n+m-k-l:n] on exit.
  1481  //
  1482  // Dggsvd3 computes C, S, R, and optionally the orthogonal transformation
  1483  // matrices U, V and Q.
  1484  //
  1485  // jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior
  1486  // is as follows
  1487  //  jobU == lapack.GSVDU        Compute orthogonal matrix U
  1488  //  jobU == lapack.GSVDNone     Do not compute orthogonal matrix.
  1489  // The behavior is the same for jobV and jobQ with the exception that instead of
  1490  // lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively.
  1491  // The matrices U, V and Q must be m×m, p×p and n×n respectively unless the
  1492  // relevant job parameter is lapack.GSVDNone.
  1493  //
  1494  // alpha and beta must have length n or Dggsvd3 will panic. On exit, alpha and
  1495  // beta contain the generalized singular value pairs of A and B
  1496  //   alpha[0:k] = 1,
  1497  //   beta[0:k]  = 0,
  1498  // if m-k-l >= 0,
  1499  //   alpha[k:k+l] = diag(C),
  1500  //   beta[k:k+l]  = diag(S),
  1501  // if m-k-l < 0,
  1502  //   alpha[k:m]= C, alpha[m:k+l]= 0
  1503  //   beta[k:m] = S, beta[m:k+l] = 1.
  1504  // if k+l < n,
  1505  //   alpha[k+l:n] = 0 and
  1506  //   beta[k+l:n]  = 0.
  1507  //
  1508  // On exit, iwork contains the permutation required to sort alpha descending.
  1509  //
  1510  // iwork must have length n, work must have length at least max(1, lwork), and
  1511  // lwork must be -1 or greater than n, otherwise Dggsvd3 will panic. If
  1512  // lwork is -1, work[0] holds the optimal lwork on return, but Dggsvd3 does
  1513  // not perform the GSVD.
  1514  func (impl Implementation) Dggsvd3(jobU, jobV, jobQ lapack.GSVDJob, m, n, p int, a []float64, lda int, b []float64, ldb int, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64, lwork int, iwork []int) (k, l int, ok bool) {
  1515  	checkMatrix(m, n, a, lda)
  1516  	checkMatrix(p, n, b, ldb)
  1517  
  1518  	switch jobU {
  1519  	case lapack.GSVDU:
  1520  		checkMatrix(m, m, u, ldu)
  1521  	case lapack.GSVDNone:
  1522  	default:
  1523  		panic(badGSVDJob + "U")
  1524  	}
  1525  	switch jobV {
  1526  	case lapack.GSVDV:
  1527  		checkMatrix(p, p, v, ldv)
  1528  	case lapack.GSVDNone:
  1529  	default:
  1530  		panic(badGSVDJob + "V")
  1531  	}
  1532  	switch jobQ {
  1533  	case lapack.GSVDQ:
  1534  		checkMatrix(n, n, q, ldq)
  1535  	case lapack.GSVDNone:
  1536  	default:
  1537  		panic(badGSVDJob + "Q")
  1538  	}
  1539  
  1540  	if len(alpha) != n {
  1541  		panic(badAlpha)
  1542  	}
  1543  	if len(beta) != n {
  1544  		panic(badBeta)
  1545  	}
  1546  
  1547  	if lwork != -1 && lwork <= n {
  1548  		panic(badWork)
  1549  	}
  1550  	if len(work) < max(1, lwork) {
  1551  		panic(shortWork)
  1552  	}
  1553  	if len(iwork) < n {
  1554  		panic(badWork)
  1555  	}
  1556  
  1557  	_k := []int32{0}
  1558  	_l := []int32{0}
  1559  	_iwork := make([]int32, len(iwork))
  1560  	for i, v := range iwork {
  1561  		v++
  1562  		if v != int(int32(v)) {
  1563  			panic("lapack: iwork element out of range")
  1564  		}
  1565  		_iwork[i] = int32(v)
  1566  	}
  1567  	ok = lapacke.Dggsvd3(lapack.Job(jobU), lapack.Job(jobV), lapack.Job(jobQ), m, n, p, _k, _l, a, lda, b, ldb, alpha, beta, u, ldu, v, ldv, q, ldq, work, lwork, _iwork)
  1568  	for i, v := range _iwork {
  1569  		iwork[i] = int(v - 1)
  1570  	}
  1571  
  1572  	return int(_k[0]), int(_l[0]), ok
  1573  }
  1574  
  1575  // Dggsvp3 computes orthogonal matrices U, V and Q such that
  1576  //
  1577  //                  n-k-l  k    l
  1578  //  U^T*A*Q =    k [ 0    A12  A13 ] if m-k-l >= 0;
  1579  //               l [ 0     0   A23 ]
  1580  //           m-k-l [ 0     0    0  ]
  1581  //
  1582  //                  n-k-l  k    l
  1583  //  U^T*A*Q =    k [ 0    A12  A13 ] if m-k-l < 0;
  1584  //             m-k [ 0     0   A23 ]
  1585  //
  1586  //                  n-k-l  k    l
  1587  //  V^T*B*Q =    l [ 0     0   B13 ]
  1588  //             p-l [ 0     0    0  ]
  1589  //
  1590  // where the k×k matrix A12 and l×l matrix B13 are non-singular
  1591  // upper triangular. A23 is l×l upper triangular if m-k-l >= 0,
  1592  // otherwise A23 is (m-k)×l upper trapezoidal.
  1593  //
  1594  // Dggsvp3 returns k and l, the dimensions of the sub-blocks. k+l
  1595  // is the effective numerical rank of the (m+p)×n matrix [ A^T B^T ]^T.
  1596  //
  1597  // jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior
  1598  // is as follows
  1599  //  jobU == lapack.GSVDU        Compute orthogonal matrix U
  1600  //  jobU == lapack.GSVDNone     Do not compute orthogonal matrix.
  1601  // The behavior is the same for jobV and jobQ with the exception that instead of
  1602  // lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively.
  1603  // The matrices U, V and Q must be m×m, p×p and n×n respectively unless the
  1604  // relevant job parameter is lapack.GSVDNone.
  1605  //
  1606  // tola and tolb are the convergence criteria for the Jacobi-Kogbetliantz
  1607  // iteration procedure. Generally, they are the same as used in the preprocessing
  1608  // step, for example,
  1609  //  tola = max(m, n)*norm(A)*eps,
  1610  //  tolb = max(p, n)*norm(B)*eps.
  1611  // Where eps is the machine epsilon.
  1612  //
  1613  // iwork must have length n, work must have length at least max(1, lwork), and
  1614  // lwork must be -1 or greater than zero, otherwise Dggsvp3 will panic.
  1615  //
  1616  // Dggsvp3 is an internal routine. It is exported for testing purposes.
  1617  func (impl Implementation) Dggsvp3(jobU, jobV, jobQ lapack.GSVDJob, m, p, n int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, iwork []int, tau, work []float64, lwork int) (k, l int) {
  1618  	checkMatrix(m, n, a, lda)
  1619  	checkMatrix(p, n, b, ldb)
  1620  
  1621  	wantu := jobU == lapack.GSVDU
  1622  	if !wantu && jobU != lapack.GSVDNone {
  1623  		panic(badGSVDJob + "U")
  1624  	}
  1625  	if jobU != lapack.GSVDNone {
  1626  		checkMatrix(m, m, u, ldu)
  1627  	}
  1628  
  1629  	wantv := jobV == lapack.GSVDV
  1630  	if !wantv && jobV != lapack.GSVDNone {
  1631  		panic(badGSVDJob + "V")
  1632  	}
  1633  	if jobV != lapack.GSVDNone {
  1634  		checkMatrix(p, p, v, ldv)
  1635  	}
  1636  
  1637  	wantq := jobQ == lapack.GSVDQ
  1638  	if !wantq && jobQ != lapack.GSVDNone {
  1639  		panic(badGSVDJob + "Q")
  1640  	}
  1641  	if jobQ != lapack.GSVDNone {
  1642  		checkMatrix(n, n, q, ldq)
  1643  	}
  1644  
  1645  	if len(tau) < n {
  1646  		panic(badTau)
  1647  	}
  1648  	if len(iwork) != n {
  1649  		panic(badWork)
  1650  	}
  1651  	if lwork != -1 && lwork < 1 {
  1652  		panic(badWork)
  1653  	}
  1654  	if len(work) < max(1, lwork) {
  1655  		panic(shortWork)
  1656  	}
  1657  
  1658  	_k := []int32{0}
  1659  	_l := []int32{0}
  1660  	_iwork := make([]int32, len(iwork))
  1661  	for i, v := range iwork {
  1662  		v++
  1663  		if v != int(int32(v)) {
  1664  			panic("lapack: iwork element out of range")
  1665  		}
  1666  		_iwork[i] = int32(v)
  1667  	}
  1668  	lapacke.Dggsvp3(lapack.Job(jobU), lapack.Job(jobV), lapack.Job(jobQ), m, p, n, a, lda, b, ldb, tola, tolb, _k, _l, u, ldu, v, ldv, q, ldq, _iwork, tau, work, lwork)
  1669  	for i, v := range _iwork {
  1670  		iwork[i] = int(v - 1)
  1671  	}
  1672  
  1673  	return int(_k[0]), int(_l[0])
  1674  }
  1675  
  1676  // Dorgbr generates one of the matrices Q or P^T computed by Dgebrd.
  1677  // See Dgebrd for the description of Q and P^T.
  1678  //
  1679  // If vect == lapack.ApplyQ, then a is assumed to have been an m×k matrix and
  1680  // Q is of order m. If m >= k, then Dorgbr returns the first n columns of Q
  1681  // where m >= n >= k. If m < k, then Dorgbr returns Q as an m×m matrix.
  1682  //
  1683  // If vect == lapack.ApplyP, then A is assumed to have been a k×n matrix, and
  1684  // P^T is of order n. If k < n, then Dorgbr returns the first m rows of P^T,
  1685  // where n >= m >= k. If k >= n, then Dorgbr returns P^T as an n×n matrix.
  1686  func (impl Implementation) Dorgbr(vect lapack.DecompUpdate, m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
  1687  	mn := min(m, n)
  1688  	wantq := vect == lapack.ApplyQ
  1689  	if wantq {
  1690  		if m < n || n < min(m, k) || m < min(m, k) {
  1691  			panic(badDims)
  1692  		}
  1693  	} else {
  1694  		if n < m || m < min(n, k) || n < min(n, k) {
  1695  			panic(badDims)
  1696  		}
  1697  	}
  1698  	if wantq {
  1699  		checkMatrix(m, k, a, lda)
  1700  	} else {
  1701  		checkMatrix(k, n, a, lda)
  1702  	}
  1703  	if lwork == -1 {
  1704  		work[0] = float64(mn)
  1705  		return
  1706  	}
  1707  	if len(work) < lwork {
  1708  		panic(badWork)
  1709  	}
  1710  	if lwork < mn {
  1711  		panic(badWork)
  1712  	}
  1713  	lapacke.Dorgbr(byte(vect), m, n, k, a, lda, tau, work, lwork)
  1714  }
  1715  
  1716  // Dorghr generates an n×n orthogonal matrix Q which is defined as the product
  1717  // of ihi-ilo elementary reflectors:
  1718  //  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
  1719  //
  1720  // a and lda represent an n×n matrix that contains the elementary reflectors, as
  1721  // returned by Dgehrd. On return, a is overwritten by the n×n orthogonal matrix
  1722  // Q. Q will be equal to the identity matrix except in the submatrix
  1723  // Q[ilo+1:ihi+1,ilo+1:ihi+1].
  1724  //
  1725  // ilo and ihi must have the same values as in the previous call of Dgehrd. It
  1726  // must hold that
  1727  //  0 <= ilo <= ihi < n,  if n > 0,
  1728  //  ilo = 0, ihi = -1,    if n == 0.
  1729  //
  1730  // tau contains the scalar factors of the elementary reflectors, as returned by
  1731  // Dgehrd. tau must have length n-1.
  1732  //
  1733  // work must have length at least max(1,lwork) and lwork must be at least
  1734  // ihi-ilo. For optimum performance lwork must be at least (ihi-ilo)*nb where nb
  1735  // is the optimal blocksize. On return, work[0] will contain the optimal value
  1736  // of lwork.
  1737  //
  1738  // If lwork == -1, instead of performing Dorghr, only the optimal value of lwork
  1739  // will be stored into work[0].
  1740  //
  1741  // If any requirement on input sizes is not met, Dorghr will panic.
  1742  //
  1743  // Dorghr is an internal routine. It is exported for testing purposes.
  1744  func (impl Implementation) Dorghr(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
  1745  	checkMatrix(n, n, a, lda)
  1746  	nh := ihi - ilo
  1747  	switch {
  1748  	case ilo < 0 || max(1, n) <= ilo:
  1749  		panic(badIlo)
  1750  	case ihi < min(ilo, n-1) || n <= ihi:
  1751  		panic(badIhi)
  1752  	case lwork < max(1, nh) && lwork != -1:
  1753  		panic(badWork)
  1754  	case len(work) < max(1, lwork):
  1755  		panic(shortWork)
  1756  	}
  1757  	lapacke.Dorghr(n, ilo+1, ihi+1, a, lda, tau, work, lwork)
  1758  }
  1759  
  1760  // Dorglq generates an m×n matrix Q with orthonormal rows defined by the product
  1761  // of elementary reflectors
  1762  //  Q = H_{k-1} * ... * H_1 * H_0
  1763  // as computed by Dgelqf. Dorglq is the blocked version of Dorgl2 that makes
  1764  // greater use of level-3 BLAS routines.
  1765  //
  1766  // len(tau) >= k, 0 <= k <= n, and 0 <= m <= n.
  1767  //
  1768  // work is temporary storage, and lwork specifies the usable memory length. At minimum,
  1769  // lwork >= m, and the amount of blocking is limited by the usable length.
  1770  // If lwork == -1, instead of computing Dorglq the optimal work length is stored
  1771  // into work[0].
  1772  //
  1773  // Dorglq will panic if the conditions on input values are not met.
  1774  //
  1775  // Dorglq is an internal routine. It is exported for testing purposes.
  1776  func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
  1777  	if lwork == -1 {
  1778  		work[0] = float64(m)
  1779  		return
  1780  	}
  1781  	checkMatrix(m, n, a, lda)
  1782  	if k < 0 {
  1783  		panic(kLT0)
  1784  	}
  1785  	if k > m {
  1786  		panic(kGTM)
  1787  	}
  1788  	if m > n {
  1789  		panic(nLTM)
  1790  	}
  1791  	if len(tau) < k {
  1792  		panic(badTau)
  1793  	}
  1794  	if len(work) < lwork {
  1795  		panic(shortWork)
  1796  	}
  1797  	if lwork < m {
  1798  		panic(badWork)
  1799  	}
  1800  	lapacke.Dorglq(m, n, k, a, lda, tau, work, lwork)
  1801  }
  1802  
  1803  // Dorgql generates the m×n matrix Q with orthonormal columns defined as the
  1804  // last n columns of a product of k elementary reflectors of order m
  1805  //  Q = H_{k-1} * ... * H_1 * H_0.
  1806  //
  1807  // It must hold that
  1808  //  0 <= k <= n <= m,
  1809  // and Dorgql will panic otherwise.
  1810  //
  1811  // On entry, the (n-k+i)-th column of A must contain the vector which defines
  1812  // the elementary reflector H_i, for i=0,...,k-1, and tau[i] must contain its
  1813  // scalar factor. On return, a contains the m×n matrix Q.
  1814  //
  1815  // tau must have length at least k, and Dorgql will panic otherwise.
  1816  //
  1817  // work must have length at least max(1,lwork), and lwork must be at least
  1818  // max(1,n), otherwise Dorgql will panic. For optimum performance lwork must
  1819  // be a sufficiently large multiple of n.
  1820  //
  1821  // If lwork == -1, instead of computing Dorgql the optimal work length is stored
  1822  // into work[0].
  1823  //
  1824  // Dorgql is an internal routine. It is exported for testing purposes.
  1825  func (impl Implementation) Dorgql(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
  1826  	switch {
  1827  	case n < 0:
  1828  		panic(nLT0)
  1829  	case m < n:
  1830  		panic(mLTN)
  1831  	case k < 0:
  1832  		panic(kLT0)
  1833  	case k > n:
  1834  		panic(kGTN)
  1835  	case lwork < max(1, n) && lwork != -1:
  1836  		panic(badWork)
  1837  	case len(work) < lwork:
  1838  		panic(shortWork)
  1839  	}
  1840  	if lwork != -1 {
  1841  		checkMatrix(m, n, a, lda)
  1842  		if len(tau) < k {
  1843  			panic(badTau)
  1844  		}
  1845  	}
  1846  
  1847  	lapacke.Dorgql(m, n, k, a, lda, tau, work, lwork)
  1848  }
  1849  
  1850  // Dorgqr generates an m×n matrix Q with orthonormal columns defined by the
  1851  // product of elementary reflectors
  1852  //  Q = H_0 * H_1 * ... * H_{k-1}
  1853  // as computed by Dgeqrf. Dorgqr is the blocked version of Dorg2r that makes
  1854  // greater use of level-3 BLAS routines.
  1855  //
  1856  // The length of tau must be at least k, and the length of work must be at least n.
  1857  // It also must be that 0 <= k <= n and 0 <= n <= m.
  1858  //
  1859  // work is temporary storage, and lwork specifies the usable memory length. At
  1860  // minimum, lwork >= n, and the amount of blocking is limited by the usable
  1861  // length. If lwork == -1, instead of computing Dorgqr the optimal work length
  1862  // is stored into work[0].
  1863  //
  1864  // Dorgqr will panic if the conditions on input values are not met.
  1865  //
  1866  // Dorgqr is an internal routine. It is exported for testing purposes.
  1867  func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
  1868  	if lwork == -1 {
  1869  		work[0] = float64(n)
  1870  		return
  1871  	}
  1872  	checkMatrix(m, n, a, lda)
  1873  	if k < 0 {
  1874  		panic(kLT0)
  1875  	}
  1876  	if k > n {
  1877  		panic(kGTN)
  1878  	}
  1879  	if n > m {
  1880  		panic(mLTN)
  1881  	}
  1882  	if len(tau) < k {
  1883  		panic(badTau)
  1884  	}
  1885  	if len(work) < lwork {
  1886  		panic(shortWork)
  1887  	}
  1888  	if lwork < n {
  1889  		panic(badWork)
  1890  	}
  1891  	lapacke.Dorgqr(m, n, k, a, lda, tau, work, lwork)
  1892  }
  1893  
  1894  // Dorgtr generates a real orthogonal matrix Q which is defined as the product
  1895  // of n-1 elementary reflectors of order n as returned by Dsytrd.
  1896  //
  1897  // The construction of Q depends on the value of uplo:
  1898  //  Q = H_{n-1} * ... * H_1 * H_0  if uplo == blas.Upper
  1899  //  Q = H_0 * H_1 * ... * H_{n-1}  if uplo == blas.Lower
  1900  // where H_i is constructed from the elementary reflectors as computed by Dsytrd.
  1901  // See the documentation for Dsytrd for more information.
  1902  //
  1903  // tau must have length at least n-1, and Dorgtr will panic otherwise.
  1904  //
  1905  // work is temporary storage, and lwork specifies the usable memory length. At
  1906  // minimum, lwork >= max(1,n-1), and Dorgtr will panic otherwise. The amount of blocking
  1907  // is limited by the usable length.
  1908  // If lwork == -1, instead of computing Dorgtr the optimal work length is stored
  1909  // into work[0].
  1910  //
  1911  // Dorgtr is an internal routine. It is exported for testing purposes.
  1912  func (impl Implementation) Dorgtr(uplo blas.Uplo, n int, a []float64, lda int, tau, work []float64, lwork int) {
  1913  	checkMatrix(n, n, a, lda)
  1914  	if len(tau) < n-1 {
  1915  		panic(badTau)
  1916  	}
  1917  	if len(work) < lwork {
  1918  		panic(badWork)
  1919  	}
  1920  	if lwork < n-1 && lwork != -1 {
  1921  		panic(badWork)
  1922  	}
  1923  	upper := uplo == blas.Upper
  1924  	if !upper && uplo != blas.Lower {
  1925  		panic(badUplo)
  1926  	}
  1927  	lapacke.Dorgtr(uplo, n, a, lda, tau, work, lwork)
  1928  }
  1929  
  1930  // Dormbr applies a multiplicative update to the matrix C based on a
  1931  // decomposition computed by Dgebrd.
  1932  //
  1933  // Dormbr overwrites the m×n matrix C with
  1934  //  Q * C   if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.NoTrans
  1935  //  C * Q   if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.NoTrans
  1936  //  Q^T * C if vect == lapack.ApplyQ, side == blas.Left, and trans == blas.Trans
  1937  //  C * Q^T if vect == lapack.ApplyQ, side == blas.Right, and trans == blas.Trans
  1938  //
  1939  //  P * C   if vect == lapack.ApplyP, side == blas.Left, and trans == blas.NoTrans
  1940  //  C * P   if vect == lapack.ApplyP, side == blas.Right, and trans == blas.NoTrans
  1941  //  P^T * C if vect == lapack.ApplyP, side == blas.Left, and trans == blas.Trans
  1942  //  C * P^T if vect == lapack.ApplyP, side == blas.Right, and trans == blas.Trans
  1943  // where P and Q are the orthogonal matrices determined by Dgebrd when reducing
  1944  // a matrix A to bidiagonal form: A = Q * B * P^T. See Dgebrd for the
  1945  // definitions of Q and P.
  1946  //
  1947  // If vect == lapack.ApplyQ, A is assumed to have been an nq×k matrix, while if
  1948  // vect == lapack.ApplyP, A is assumed to have been a k×nq matrix. nq = m if
  1949  // side == blas.Left, while nq = n if side == blas.Right.
  1950  //
  1951  // tau must have length min(nq,k), and Dormbr will panic otherwise. tau contains
  1952  // the elementary reflectors to construct Q or P depending on the value of
  1953  // vect.
  1954  //
  1955  // work must have length at least max(1,lwork), and lwork must be either -1 or
  1956  // at least max(1,n) if side == blas.Left, and at least max(1,m) if side ==
  1957  // blas.Right. For optimum performance lwork should be at least n*nb if side ==
  1958  // blas.Left, and at least m*nb if side == blas.Right, where nb is the optimal
  1959  // block size. On return, work[0] will contain the optimal value of lwork.
  1960  //
  1961  // If lwork == -1, the function only calculates the optimal value of lwork and
  1962  // returns it in work[0].
  1963  //
  1964  // Dormbr is an internal routine. It is exported for testing purposes.
  1965  func (impl Implementation) Dormbr(vect lapack.DecompUpdate, side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
  1966  	if side != blas.Left && side != blas.Right {
  1967  		panic(badSide)
  1968  	}
  1969  	if trans != blas.NoTrans && trans != blas.Trans {
  1970  		panic(badTrans)
  1971  	}
  1972  	if vect != lapack.ApplyP && vect != lapack.ApplyQ {
  1973  		panic(badDecompUpdate)
  1974  	}
  1975  	nq := n
  1976  	nw := m
  1977  	if side == blas.Left {
  1978  		nq = m
  1979  		nw = n
  1980  	}
  1981  	if vect == lapack.ApplyQ {
  1982  		checkMatrix(nq, min(nq, k), a, lda)
  1983  	} else {
  1984  		checkMatrix(min(nq, k), nq, a, lda)
  1985  	}
  1986  	if len(tau) < min(nq, k) {
  1987  		panic(badTau)
  1988  	}
  1989  	checkMatrix(m, n, c, ldc)
  1990  	if len(work) < lwork {
  1991  		panic(shortWork)
  1992  	}
  1993  	if lwork < max(1, nw) && lwork != -1 {
  1994  		panic(badWork)
  1995  	}
  1996  	lapacke.Dormbr(byte(vect), side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork)
  1997  }
  1998  
  1999  // Dormhr multiplies an m×n general matrix C with an nq×nq orthogonal matrix Q
  2000  //  Q * C,    if side == blas.Left and trans == blas.NoTrans,
  2001  //  Q^T * C,  if side == blas.Left and trans == blas.Trans,
  2002  //  C * Q,    if side == blas.Right and trans == blas.NoTrans,
  2003  //  C * Q^T,  if side == blas.Right and trans == blas.Trans,
  2004  // where nq == m if side == blas.Left and nq == n if side == blas.Right.
  2005  //
  2006  // Q is defined implicitly as the product of ihi-ilo elementary reflectors, as
  2007  // returned by Dgehrd:
  2008  //  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
  2009  // Q is equal to the identity matrix except in the submatrix
  2010  // Q[ilo+1:ihi+1,ilo+1:ihi+1].
  2011  //
  2012  // ilo and ihi must have the same values as in the previous call of Dgehrd. It
  2013  // must hold that
  2014  //  0 <= ilo <= ihi < m,   if m > 0 and side == blas.Left,
  2015  //  ilo = 0 and ihi = -1,  if m = 0 and side == blas.Left,
  2016  //  0 <= ilo <= ihi < n,   if n > 0 and side == blas.Right,
  2017  //  ilo = 0 and ihi = -1,  if n = 0 and side == blas.Right.
  2018  //
  2019  // a and lda represent an m×m matrix if side == blas.Left and an n×n matrix if
  2020  // side == blas.Right. The matrix contains vectors which define the elementary
  2021  // reflectors, as returned by Dgehrd.
  2022  //
  2023  // tau contains the scalar factors of the elementary reflectors, as returned by
  2024  // Dgehrd. tau must have length m-1 if side == blas.Left and n-1 if side ==
  2025  // blas.Right.
  2026  //
  2027  // c and ldc represent the m×n matrix C. On return, c is overwritten by the
  2028  // product with Q.
  2029  //
  2030  // work must have length at least max(1,lwork), and lwork must be at least
  2031  // max(1,n), if side == blas.Left, and max(1,m), if side == blas.Right. For
  2032  // optimum performance lwork should be at least n*nb if side == blas.Left and
  2033  // m*nb if side == blas.Right, where nb is the optimal block size. On return,
  2034  // work[0] will contain the optimal value of lwork.
  2035  //
  2036  // If lwork == -1, instead of performing Dormhr, only the optimal value of lwork
  2037  // will be stored in work[0].
  2038  //
  2039  // If any requirement on input sizes is not met, Dormhr will panic.
  2040  //
  2041  // Dormhr is an internal routine. It is exported for testing purposes.
  2042  func (impl Implementation) Dormhr(side blas.Side, trans blas.Transpose, m, n, ilo, ihi int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
  2043  	var (
  2044  		nq int // The order of Q.
  2045  		nw int // The minimum length of work.
  2046  	)
  2047  	switch side {
  2048  	case blas.Left:
  2049  		nq = m
  2050  		nw = n
  2051  	case blas.Right:
  2052  		nq = n
  2053  		nw = m
  2054  	default:
  2055  		panic(badSide)
  2056  	}
  2057  	switch {
  2058  	case trans != blas.NoTrans && trans != blas.Trans:
  2059  		panic(badTrans)
  2060  	case ilo < 0 || max(1, nq) <= ilo:
  2061  		panic(badIlo)
  2062  	case ihi < min(ilo, nq-1) || nq <= ihi:
  2063  		panic(badIhi)
  2064  	case lwork < max(1, nw) && lwork != -1:
  2065  		panic(badWork)
  2066  	case len(work) < max(1, lwork):
  2067  		panic(shortWork)
  2068  	}
  2069  	if lwork != -1 {
  2070  		checkMatrix(m, n, c, ldc)
  2071  		checkMatrix(nq, nq, a, lda)
  2072  		if len(tau) != nq-1 && nq > 0 {
  2073  			panic(badTau)
  2074  		}
  2075  	}
  2076  	lapacke.Dormhr(side, trans, m, n, ilo+1, ihi+1, a, lda, tau, c, ldc, work, lwork)
  2077  }
  2078  
  2079  // Dormlq multiplies the matrix C by the orthogonal matrix Q defined by the
  2080  // slices a and tau. A and tau are as returned from Dgelqf.
  2081  //  C = Q * C    if side == blas.Left and trans == blas.NoTrans
  2082  //  C = Q^T * C  if side == blas.Left and trans == blas.Trans
  2083  //  C = C * Q    if side == blas.Right and trans == blas.NoTrans
  2084  //  C = C * Q^T  if side == blas.Right and trans == blas.Trans
  2085  // If side == blas.Left, A is a matrix of side k×m, and if side == blas.Right
  2086  // A is of size k×n. This uses a blocked algorithm.
  2087  //
  2088  // Work is temporary storage, and lwork specifies the usable memory length.
  2089  // At minimum, lwork >= m if side == blas.Left and lwork >= n if side == blas.Right,
  2090  // and this function will panic otherwise.
  2091  // Dormlq uses a block algorithm, but the block size is limited
  2092  // by the temporary space available. If lwork == -1, instead of performing Dormlq,
  2093  // the optimal work length will be stored into work[0].
  2094  //
  2095  // tau contains the Householder scales and must have length at least k, and
  2096  // this function will panic otherwise.
  2097  func (impl Implementation) Dormlq(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
  2098  	if side != blas.Left && side != blas.Right {
  2099  		panic(badSide)
  2100  	}
  2101  	if trans != blas.Trans && trans != blas.NoTrans {
  2102  		panic(badTrans)
  2103  	}
  2104  	left := side == blas.Left
  2105  	if left {
  2106  		checkMatrix(k, m, a, lda)
  2107  	} else {
  2108  		checkMatrix(k, n, a, lda)
  2109  	}
  2110  	checkMatrix(m, n, c, ldc)
  2111  	if len(tau) < k {
  2112  		panic(badTau)
  2113  	}
  2114  	if len(work) < lwork {
  2115  		panic(shortWork)
  2116  	}
  2117  	nw := m
  2118  	if left {
  2119  		nw = n
  2120  	}
  2121  	if lwork < max(1, nw) && lwork != -1 {
  2122  		panic(badWork)
  2123  	}
  2124  
  2125  	lapacke.Dormlq(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork)
  2126  }
  2127  
  2128  // Dormqr multiplies an m×n matrix C by an orthogonal matrix Q as
  2129  //  C = Q * C,    if side == blas.Left  and trans == blas.NoTrans,
  2130  //  C = Q^T * C,  if side == blas.Left  and trans == blas.Trans,
  2131  //  C = C * Q,    if side == blas.Right and trans == blas.NoTrans,
  2132  //  C = C * Q^T,  if side == blas.Right and trans == blas.Trans,
  2133  // where Q is defined as the product of k elementary reflectors
  2134  //  Q = H_0 * H_1 * ... * H_{k-1}.
  2135  //
  2136  // If side == blas.Left, A is an m×k matrix and 0 <= k <= m.
  2137  // If side == blas.Right, A is an n×k matrix and 0 <= k <= n.
  2138  // The ith column of A contains the vector which defines the elementary
  2139  // reflector H_i and tau[i] contains its scalar factor. tau must have length k
  2140  // and Dormqr will panic otherwise. Dgeqrf returns A and tau in the required
  2141  // form.
  2142  //
  2143  // work must have length at least max(1,lwork), and lwork must be at least n if
  2144  // side == blas.Left and at least m if side == blas.Right, otherwise Dormqr will
  2145  // panic.
  2146  //
  2147  // work is temporary storage, and lwork specifies the usable memory length. At
  2148  // minimum, lwork >= m if side == blas.Left and lwork >= n if side ==
  2149  // blas.Right, and this function will panic otherwise. Larger values of lwork
  2150  // will generally give better performance. On return, work[0] will contain the
  2151  // optimal value of lwork.
  2152  //
  2153  // If lwork is -1, instead of performing Dormqr, the optimal workspace size will
  2154  // be stored into work[0].
  2155  func (impl Implementation) Dormqr(side blas.Side, trans blas.Transpose, m, n, k int, a []float64, lda int, tau, c []float64, ldc int, work []float64, lwork int) {
  2156  	var nq, nw int
  2157  	switch side {
  2158  	default:
  2159  		panic(badSide)
  2160  	case blas.Left:
  2161  		nq = m
  2162  		nw = n
  2163  	case blas.Right:
  2164  		nq = n
  2165  		nw = m
  2166  	}
  2167  	switch {
  2168  	case trans != blas.NoTrans && trans != blas.Trans:
  2169  		panic(badTrans)
  2170  	case m < 0 || n < 0:
  2171  		panic(negDimension)
  2172  	case k < 0 || nq < k:
  2173  		panic("lapack: invalid value of k")
  2174  	case len(work) < lwork:
  2175  		panic(shortWork)
  2176  	case lwork < max(1, nw) && lwork != -1:
  2177  		panic(badWork)
  2178  	}
  2179  	if lwork != -1 {
  2180  		checkMatrix(nq, k, a, lda)
  2181  		checkMatrix(m, n, c, ldc)
  2182  		if len(tau) != k {
  2183  			panic(badTau)
  2184  		}
  2185  	}
  2186  
  2187  	lapacke.Dormqr(side, trans, m, n, k, a, lda, tau, c, ldc, work, lwork)
  2188  }
  2189  
  2190  // Dpocon estimates the reciprocal of the condition number of a positive-definite
  2191  // matrix A given the Cholesky decomposition of A. The condition number computed
  2192  // is based on the 1-norm and the ∞-norm.
  2193  //
  2194  // anorm is the 1-norm and the ∞-norm of the original matrix A.
  2195  //
  2196  // work is a temporary data slice of length at least 3*n and Dpocon will panic otherwise.
  2197  //
  2198  // iwork is a temporary data slice of length at least n and Dpocon will panic otherwise.
  2199  // Elements of iwork must fit within the int32 type or Dpocon will panic.
  2200  func (impl Implementation) Dpocon(uplo blas.Uplo, n int, a []float64, lda int, anorm float64, work []float64, iwork []int) float64 {
  2201  	checkMatrix(n, n, a, lda)
  2202  	if uplo != blas.Upper && uplo != blas.Lower {
  2203  		panic(badUplo)
  2204  	}
  2205  	if len(work) < 3*n {
  2206  		panic(badWork)
  2207  	}
  2208  	if len(iwork) < n {
  2209  		panic(badWork)
  2210  	}
  2211  	rcond := make([]float64, 1)
  2212  	_iwork := make([]int32, len(iwork))
  2213  	for i, v := range iwork {
  2214  		if v != int(int32(v)) {
  2215  			panic("lapack: iwork element out of range")
  2216  		}
  2217  		_iwork[i] = int32(v)
  2218  	}
  2219  	lapacke.Dpocon(uplo, n, a, lda, anorm, rcond, work, _iwork)
  2220  	for i, v := range _iwork {
  2221  		iwork[i] = int(v)
  2222  	}
  2223  	return rcond[0]
  2224  }
  2225  
  2226  // Dsteqr computes the eigenvalues and optionally the eigenvectors of a symmetric
  2227  // tridiagonal matrix using the implicit QL or QR method. The eigenvectors of a
  2228  // full or band symmetric matrix can also be found if Dsytrd, Dsptrd, or Dsbtrd
  2229  // have been used to reduce this matrix to tridiagonal form.
  2230  //
  2231  // d, on entry, contains the diagonal elements of the tridiagonal matrix. On exit,
  2232  // d contains the eigenvalues in ascending order. d must have length n and
  2233  // Dsteqr will panic otherwise.
  2234  //
  2235  // e, on entry, contains the off-diagonal elements of the tridiagonal matrix on
  2236  // entry, and is overwritten during the call to Dsteqr. e must have length n-1 and
  2237  // Dsteqr will panic otherwise.
  2238  //
  2239  // z, on entry, contains the n×n orthogonal matrix used in the reduction to
  2240  // tridiagonal form if compz == lapack.OriginalEV. On exit, if
  2241  // compz == lapack.OriginalEV, z contains the orthonormal eigenvectors of the
  2242  // original symmetric matrix, and if compz == lapack.TridiagEV, z contains the
  2243  // orthonormal eigenvectors of the symmetric tridiagonal matrix. z is not used
  2244  // if compz == lapack.None.
  2245  //
  2246  // work must have length at least max(1, 2*n-2) if the eigenvectors are computed,
  2247  // and Dsteqr will panic otherwise.
  2248  //
  2249  // Dsteqr is an internal routine. It is exported for testing purposes.
  2250  func (impl Implementation) Dsteqr(compz lapack.EVComp, n int, d, e, z []float64, ldz int, work []float64) (ok bool) {
  2251  	if n < 0 {
  2252  		panic(nLT0)
  2253  	}
  2254  	if len(d) < n {
  2255  		panic(badD)
  2256  	}
  2257  	if len(e) < n-1 {
  2258  		panic(badE)
  2259  	}
  2260  	if compz != lapack.None && compz != lapack.TridiagEV && compz != lapack.OriginalEV {
  2261  		panic(badEVComp)
  2262  	}
  2263  	if compz != lapack.None {
  2264  		if len(work) < max(1, 2*n-2) {
  2265  			panic(badWork)
  2266  		}
  2267  		checkMatrix(n, n, z, ldz)
  2268  	}
  2269  
  2270  	return lapacke.Dsteqr(lapack.Comp(compz), n, d, e, z, ldz, work)
  2271  }
  2272  
  2273  // Dsterf computes all eigenvalues of a symmetric tridiagonal matrix using the
  2274  // Pal-Walker-Kahan variant of the QL or QR algorithm.
  2275  //
  2276  // d contains the diagonal elements of the tridiagonal matrix on entry, and
  2277  // contains the eigenvalues in ascending order on exit. d must have length at
  2278  // least n, or Dsterf will panic.
  2279  //
  2280  // e contains the off-diagonal elements of the tridiagonal matrix on entry, and is
  2281  // overwritten during the call to Dsterf. e must have length of at least n-1 or
  2282  // Dsterf will panic.
  2283  //
  2284  // Dsterf is an internal routine. It is exported for testing purposes.
  2285  func (impl Implementation) Dsterf(n int, d, e []float64) (ok bool) {
  2286  	if n < 0 {
  2287  		panic(nLT0)
  2288  	}
  2289  	if n == 0 {
  2290  		return true
  2291  	}
  2292  	if len(d) < n {
  2293  		panic(badD)
  2294  	}
  2295  	if len(e) < n-1 {
  2296  		panic(badE)
  2297  	}
  2298  
  2299  	return lapacke.Dsterf(n, d, e)
  2300  }
  2301  
  2302  // Dsyev computes all eigenvalues and, optionally, the eigenvectors of a real
  2303  // symmetric matrix A.
  2304  //
  2305  // w contains the eigenvalues in ascending order upon return. w must have length
  2306  // at least n, and Dsyev will panic otherwise.
  2307  //
  2308  // On entry, a contains the elements of the symmetric matrix A in the triangular
  2309  // portion specified by uplo. If jobz == lapack.ComputeEV a contains the
  2310  // orthonormal eigenvectors of A on exit, otherwise on exit the specified
  2311  // triangular region is overwritten.
  2312  //
  2313  // work is temporary storage, and lwork specifies the usable memory length. At minimum,
  2314  // lwork >= 3*n-1, and Dsyev will panic otherwise. The amount of blocking is
  2315  // limited by the usable length. If lwork == -1, instead of computing Dsyev the
  2316  // optimal work length is stored into work[0].
  2317  func (impl Implementation) Dsyev(jobz lapack.EVJob, uplo blas.Uplo, n int, a []float64, lda int, w, work []float64, lwork int) (ok bool) {
  2318  	checkMatrix(n, n, a, lda)
  2319  	if lwork == -1 {
  2320  		work[0] = 3*float64(n) - 1
  2321  		return
  2322  	}
  2323  	if len(work) < lwork {
  2324  		panic(badWork)
  2325  	}
  2326  	if lwork < 3*n-1 {
  2327  		panic(badWork)
  2328  	}
  2329  	return lapacke.Dsyev(lapack.Job(jobz), uplo, n, a, lda, w, work, lwork)
  2330  }
  2331  
  2332  // Dsytrd reduces a symmetric n×n matrix A to symmetric tridiagonal form by an
  2333  // orthogonal similarity transformation
  2334  //  Q^T * A * Q = T
  2335  // where Q is an orthonormal matrix and T is symmetric and tridiagonal.
  2336  //
  2337  // On entry, a contains the elements of the input matrix in the triangle specified
  2338  // by uplo. On exit, the diagonal and sub/super-diagonal are overwritten by the
  2339  // corresponding elements of the tridiagonal matrix T. The remaining elements in
  2340  // the triangle, along with the array tau, contain the data to construct Q as
  2341  // the product of elementary reflectors.
  2342  //
  2343  // If uplo == blas.Upper, Q is constructed with
  2344  //  Q = H_{n-2} * ... * H_1 * H_0
  2345  // where
  2346  //  H_i = I - tau_i * v * v^T
  2347  // v is constructed as v[i+1:n] = 0, v[i] = 1, v[0:i-1] is stored in A[0:i-1, i+1].
  2348  // The elements of A are
  2349  //  [ d   e  v1  v2  v3]
  2350  //  [     d   e  v2  v3]
  2351  //  [         d   e  v3]
  2352  //  [             d   e]
  2353  //  [                 e]
  2354  //
  2355  // If uplo == blas.Lower, Q is constructed with
  2356  //  Q = H_0 * H_1 * ... * H_{n-2}
  2357  // where
  2358  //  H_i = I - tau_i * v * v^T
  2359  // v is constructed as v[0:i+1] = 0, v[i+1] = 1, v[i+2:n] is stored in A[i+2:n, i].
  2360  // The elements of A are
  2361  //  [ d                ]
  2362  //  [ e   d            ]
  2363  //  [v0   e   d        ]
  2364  //  [v0  v1   e   d    ]
  2365  //  [v0  v1  v2   e   d]
  2366  //
  2367  // d must have length n, and e and tau must have length n-1. Dsytrd will panic if
  2368  // these conditions are not met.
  2369  //
  2370  // work is temporary storage, and lwork specifies the usable memory length. At minimum,
  2371  // lwork >= 1, and Dsytrd will panic otherwise. The amount of blocking is
  2372  // limited by the usable length.
  2373  // If lwork == -1, instead of computing Dsytrd the optimal work length is stored
  2374  // into work[0].
  2375  //
  2376  // Dsytrd is an internal routine. It is exported for testing purposes.
  2377  func (impl Implementation) Dsytrd(uplo blas.Uplo, n int, a []float64, lda int, d, e, tau, work []float64, lwork int) {
  2378  	checkMatrix(n, n, a, lda)
  2379  	if len(d) < n {
  2380  		panic(badD)
  2381  	}
  2382  	if len(e) < n-1 {
  2383  		panic(badE)
  2384  	}
  2385  	if len(tau) < n-1 {
  2386  		panic(badTau)
  2387  	}
  2388  	if len(work) < lwork {
  2389  		panic(shortWork)
  2390  	}
  2391  	if lwork != -1 && lwork < 1 {
  2392  		panic(badWork)
  2393  	}
  2394  	if uplo != blas.Upper && uplo != blas.Lower {
  2395  		panic(badUplo)
  2396  	}
  2397  
  2398  	lapacke.Dsytrd(uplo, n, a, lda, d, e, tau, work, lwork)
  2399  }
  2400  
  2401  // Dtrcon estimates the reciprocal of the condition number of a triangular matrix A.
  2402  // The condition number computed may be based on the 1-norm or the ∞-norm.
  2403  //
  2404  // work is a temporary data slice of length at least 3*n and Dtrcon will panic otherwise.
  2405  //
  2406  // iwork is a temporary data slice of length at least n and Dtrcon will panic otherwise.
  2407  // Elements of iwork must fit within the int32 type or Dtrcon will panic.
  2408  func (impl Implementation) Dtrcon(norm lapack.MatrixNorm, uplo blas.Uplo, diag blas.Diag, n int, a []float64, lda int, work []float64, iwork []int) float64 {
  2409  	if norm != lapack.MaxColumnSum && norm != lapack.MaxRowSum {
  2410  		panic(badNorm)
  2411  	}
  2412  	if uplo != blas.Upper && uplo != blas.Lower {
  2413  		panic(badUplo)
  2414  	}
  2415  	if diag != blas.NonUnit && diag != blas.Unit {
  2416  		panic(badDiag)
  2417  	}
  2418  	if len(work) < 3*n {
  2419  		panic(badWork)
  2420  	}
  2421  	if len(iwork) < n {
  2422  		panic(badWork)
  2423  	}
  2424  	rcond := []float64{0}
  2425  	_iwork := make([]int32, len(iwork))
  2426  	for i, v := range iwork {
  2427  		if v != int(int32(v)) {
  2428  			panic("lapack: iwork element out of range")
  2429  		}
  2430  		_iwork[i] = int32(v)
  2431  	}
  2432  	lapacke.Dtrcon(byte(norm), uplo, diag, n, a, lda, rcond, work, _iwork)
  2433  	for i, v := range _iwork {
  2434  		iwork[i] = int(v)
  2435  	}
  2436  	return rcond[0]
  2437  }
  2438  
  2439  // Dtrexc reorders the real Schur factorization of a n×n real matrix
  2440  //  A = Q*T*Q^T
  2441  // so that the diagonal block of T with row index ifst is moved to row ilst.
  2442  //
  2443  // On entry, T must be in Schur canonical form, that is, block upper triangular
  2444  // with 1×1 and 2×2 diagonal blocks; each 2×2 diagonal block has its diagonal
  2445  // elements equal and its off-diagonal elements of opposite sign.
  2446  //
  2447  // On return, T will be reordered by an orthogonal similarity transformation Z
  2448  // as Z^T*T*Z, and will be again in Schur canonical form.
  2449  //
  2450  // If compq is lapack.UpdateSchur, on return the matrix Q of Schur vectors will be
  2451  // updated by postmultiplying it with Z.
  2452  // If compq is lapack.None, the matrix Q is not referenced and will not be
  2453  // updated.
  2454  // For other values of compq Dtrexc will panic.
  2455  //
  2456  // ifst and ilst specify the reordering of the diagonal blocks of T. The block
  2457  // with row index ifst is moved to row ilst, by a sequence of transpositions
  2458  // between adjacent blocks.
  2459  //
  2460  // If ifst points to the second row of a 2×2 block, ifstOut will point to the
  2461  // first row, otherwise it will be equal to ifst.
  2462  //
  2463  // ilstOut will point to the first row of the block in its final position. If ok
  2464  // is true, ilstOut may differ from ilst by +1 or -1.
  2465  //
  2466  // It must hold that
  2467  //  0 <= ifst < n, and  0 <= ilst < n,
  2468  // otherwise Dtrexc will panic.
  2469  //
  2470  // If ok is false, two adjacent blocks were too close to swap because the
  2471  // problem is very ill-conditioned. T may have been partially reordered, and
  2472  // ilstOut will point to the first row of the block at the position to which it
  2473  // has been moved.
  2474  //
  2475  // work must have length at least n, otherwise Dtrexc will panic.
  2476  //
  2477  // Dtrexc is an internal routine. It is exported for testing purposes.
  2478  func (impl Implementation) Dtrexc(compq lapack.EVComp, n int, t []float64, ldt int, q []float64, ldq int, ifst, ilst int, work []float64) (ifstOut, ilstOut int, ok bool) {
  2479  	checkMatrix(n, n, t, ldt)
  2480  	switch compq {
  2481  	default:
  2482  		panic("lapack: bad value of compq")
  2483  	case lapack.None:
  2484  		// q is not referenced but LAPACKE checks that ldq >= n always.
  2485  		q = nil
  2486  		ldq = max(1, n)
  2487  	case lapack.UpdateSchur:
  2488  		checkMatrix(n, n, q, ldq)
  2489  	}
  2490  	if (ifst < 0 || n <= ifst) && n > 0 {
  2491  		panic("lapack: ifst out of range")
  2492  	}
  2493  	if (ilst < 0 || n <= ilst) && n > 0 {
  2494  		panic("lapack: ilst out of range")
  2495  	}
  2496  	if len(work) < n {
  2497  		panic(badWork)
  2498  	}
  2499  
  2500  	// Quick return if possible.
  2501  	if n <= 1 {
  2502  		return ifst, ilst, true
  2503  	}
  2504  
  2505  	ifst32 := []int32{int32(ifst + 1)}
  2506  	ilst32 := []int32{int32(ilst + 1)}
  2507  	ok = lapacke.Dtrexc(lapack.Comp(compq), n, t, ldt, q, ldq, ifst32, ilst32, work)
  2508  	ifst = int(ifst32[0] - 1)
  2509  	ilst = int(ilst32[0] - 1)
  2510  	return ifst, ilst, ok
  2511  }
  2512  
  2513  // Dtrtri computes the inverse of a triangular matrix, storing the result in place
  2514  // into a. This is the BLAS level 3 version of the algorithm which builds upon
  2515  // Dtrti2 to operate on matrix blocks instead of only individual columns.
  2516  //
  2517  // Dtrtri returns whether the matrix a is singular.
  2518  // If the matrix is singular, the inversion is not performed.
  2519  func (impl Implementation) Dtrtri(uplo blas.Uplo, diag blas.Diag, n int, a []float64, lda int) (ok bool) {
  2520  	checkMatrix(n, n, a, lda)
  2521  	if uplo != blas.Upper && uplo != blas.Lower {
  2522  		panic(badUplo)
  2523  	}
  2524  	if diag != blas.NonUnit && diag != blas.Unit {
  2525  		panic(badDiag)
  2526  	}
  2527  	return lapacke.Dtrtri(uplo, diag, n, a, lda)
  2528  }
  2529  
  2530  // Dtrtrs solves a triangular system of the form A * X = B or A^T * X = B.
  2531  // Dtrtrs returns whether the solve completed successfully.
  2532  // If A is singular, no solve is performed.
  2533  func (impl Implementation) Dtrtrs(uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, n, nrhs int, a []float64, lda int, b []float64, ldb int) (ok bool) {
  2534  	return lapacke.Dtrtrs(uplo, trans, diag, n, nrhs, a, lda, b, ldb)
  2535  }
  2536  
  2537  // Dhseqr computes the eigenvalues of an n×n Hessenberg matrix H and,
  2538  // optionally, the matrices T and Z from the Schur decomposition
  2539  //  H = Z T Z^T,
  2540  // where T is an n×n upper quasi-triangular matrix (the Schur form), and Z is
  2541  // the n×n orthogonal matrix of Schur vectors.
  2542  //
  2543  // Optionally Z may be postmultiplied into an input orthogonal matrix Q so that
  2544  // this routine can give the Schur factorization of a matrix A which has been
  2545  // reduced to the Hessenberg form H by the orthogonal matrix Q:
  2546  //  A = Q H Q^T = (QZ) T (QZ)^T.
  2547  //
  2548  // If job == lapack.EigenvaluesOnly, only the eigenvalues will be computed.
  2549  // If job == lapack.EigenvaluesAndSchur, the eigenvalues and the Schur form T will
  2550  // be computed.
  2551  // For other values of job Dhseqr will panic.
  2552  //
  2553  // If compz == lapack.None, no Schur vectors will be computed and Z will not be
  2554  // referenced.
  2555  // If compz == lapack.HessEV, on return Z will contain the matrix of Schur
  2556  // vectors of H.
  2557  // If compz == lapack.OriginalEV, on entry z is assumed to contain the orthogonal
  2558  // matrix Q that is the identity except for the submatrix
  2559  // Q[ilo:ihi+1,ilo:ihi+1]. On return z will be updated to the product Q*Z.
  2560  //
  2561  // ilo and ihi determine the block of H on which Dhseqr operates. It is assumed
  2562  // that H is already upper triangular in rows and columns [0:ilo] and [ihi+1:n],
  2563  // although it will be only checked that the block is isolated, that is,
  2564  //  ilo == 0   or H[ilo,ilo-1] == 0,
  2565  //  ihi == n-1 or H[ihi+1,ihi] == 0,
  2566  // and Dhseqr will panic otherwise. ilo and ihi are typically set by a previous
  2567  // call to Dgebal, otherwise they should be set to 0 and n-1, respectively. It
  2568  // must hold that
  2569  //  0 <= ilo <= ihi < n,     if n > 0,
  2570  //  ilo == 0 and ihi == -1,  if n == 0.
  2571  //
  2572  // wr and wi must have length n.
  2573  //
  2574  // work must have length at least lwork and lwork must be at least max(1,n)
  2575  // otherwise Dhseqr will panic. The minimum lwork delivers very good and
  2576  // sometimes optimal performance, although lwork as large as 11*n may be
  2577  // required. On return, work[0] will contain the optimal value of lwork.
  2578  //
  2579  // If lwork is -1, instead of performing Dhseqr, the function only estimates the
  2580  // optimal workspace size and stores it into work[0]. Neither h nor z are
  2581  // accessed.
  2582  //
  2583  // unconverged indicates whether Dhseqr computed all the eigenvalues.
  2584  //
  2585  // If unconverged == 0, all the eigenvalues have been computed and their real
  2586  // and imaginary parts will be stored on return in wr and wi, respectively. If
  2587  // two eigenvalues are computed as a complex conjugate pair, they are stored in
  2588  // consecutive elements of wr and wi, say the i-th and (i+1)th, with wi[i] > 0
  2589  // and wi[i+1] < 0.
  2590  //
  2591  // If unconverged == 0 and job == lapack.EigenvaluesAndSchur, on return H will
  2592  // contain the upper quasi-triangular matrix T from the Schur decomposition (the
  2593  // Schur form). 2×2 diagonal blocks (corresponding to complex conjugate pairs of
  2594  // eigenvalues) will be returned in standard form, with
  2595  //  H[i,i] == H[i+1,i+1],
  2596  // and
  2597  //  H[i+1,i]*H[i,i+1] < 0.
  2598  // The eigenvalues will be stored in wr and wi in the same order as on the
  2599  // diagonal of the Schur form returned in H, with
  2600  //  wr[i] = H[i,i],
  2601  // and, if H[i:i+2,i:i+2] is a 2×2 diagonal block,
  2602  //  wi[i]   = sqrt(-H[i+1,i]*H[i,i+1]),
  2603  //  wi[i+1] = -wi[i].
  2604  //
  2605  // If unconverged == 0 and job == lapack.EigenvaluesOnly, the contents of h
  2606  // on return is unspecified.
  2607  //
  2608  // If unconverged > 0, some eigenvalues have not converged, and the blocks
  2609  // [0:ilo] and [unconverged:n] of wr and wi will contain those eigenvalues which
  2610  // have been successfully computed. Failures are rare.
  2611  //
  2612  // If unconverged > 0 and job == lapack.EigenvaluesOnly, on return the
  2613  // remaining unconverged eigenvalues are the eigenvalues of the upper Hessenberg
  2614  // matrix H[ilo:unconverged,ilo:unconverged].
  2615  //
  2616  // If unconverged > 0 and job == lapack.EigenvaluesAndSchur, then on
  2617  // return
  2618  //  (initial H) U = U (final H),   (*)
  2619  // where U is an orthogonal matrix. The final H is upper Hessenberg and
  2620  // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
  2621  //
  2622  // If unconverged > 0 and compz == lapack.OriginalEV, then on return
  2623  //  (final Z) = (initial Z) U,
  2624  // where U is the orthogonal matrix in (*) regardless of the value of job.
  2625  //
  2626  // If unconverged > 0 and compz == lapack.InitZ, then on return
  2627  //  (final Z) = U,
  2628  // where U is the orthogonal matrix in (*) regardless of the value of job.
  2629  //
  2630  // References:
  2631  //  [1] R. Byers. LAPACK 3.1 xHSEQR: Tuning and Implementation Notes on the
  2632  //      Small Bulge Multi-Shift QR Algorithm with Aggressive Early Deflation.
  2633  //      LAPACK Working Note 187 (2007)
  2634  //      URL: http://www.netlib.org/lapack/lawnspdf/lawn187.pdf
  2635  //  [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
  2636  //      Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
  2637  //      Anal. Appl. 23(4) (2002), pp. 929—947
  2638  //      URL: http://dx.doi.org/10.1137/S0895479801384573
  2639  //  [3] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
  2640  //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
  2641  //      URL: http://dx.doi.org/10.1137/S0895479801384585
  2642  //
  2643  // Dhseqr is an internal routine. It is exported for testing purposes.
  2644  func (impl Implementation) Dhseqr(job lapack.EVJob, compz lapack.EVComp, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, z []float64, ldz int, work []float64, lwork int) (unconverged int) {
  2645  	switch job {
  2646  	default:
  2647  		panic(badEVJob)
  2648  	case lapack.EigenvaluesOnly, lapack.EigenvaluesAndSchur:
  2649  	}
  2650  	var wantz bool
  2651  	switch compz {
  2652  	default:
  2653  		panic(badEVComp)
  2654  	case lapack.None:
  2655  	case lapack.HessEV, lapack.OriginalEV:
  2656  		wantz = true
  2657  	}
  2658  	switch {
  2659  	case n < 0:
  2660  		panic(nLT0)
  2661  	case ilo < 0 || max(0, n-1) < ilo:
  2662  		panic(badIlo)
  2663  	case ihi < min(ilo, n-1) || n <= ihi:
  2664  		panic(badIhi)
  2665  	case len(work) < lwork:
  2666  		panic(shortWork)
  2667  	case lwork < max(1, n) && lwork != -1:
  2668  		panic(badWork)
  2669  	}
  2670  	if lwork != -1 {
  2671  		checkMatrix(n, n, h, ldh)
  2672  		switch {
  2673  		case wantz:
  2674  			checkMatrix(n, n, z, ldz)
  2675  		case len(wr) < n:
  2676  			panic("lapack: wr has insufficient length")
  2677  		case len(wi) < n:
  2678  			panic("lapack: wi has insufficient length")
  2679  		}
  2680  	}
  2681  
  2682  	return lapacke.Dhseqr(lapack.Job(job), lapack.Comp(compz), n, ilo+1, ihi+1,
  2683  		h, ldh, wr, wi, z, ldz, work, lwork)
  2684  }
  2685  
  2686  // Dgeev computes the eigenvalues and, optionally, the left and/or right
  2687  // eigenvectors for an n×n real nonsymmetric matrix A.
  2688  //
  2689  // The right eigenvector v_j of A corresponding to an eigenvalue λ_j
  2690  // is defined by
  2691  //  A v_j = λ_j v_j,
  2692  // and the left eigenvector u_j corresponding to an eigenvalue λ_j is defined by
  2693  //  u_j^H A = λ_j u_j^H,
  2694  // where u_j^H is the conjugate transpose of u_j.
  2695  //
  2696  // On return, A will be overwritten and the left and right eigenvectors will be
  2697  // stored, respectively, in the columns of the n×n matrices VL and VR in the
  2698  // same order as their eigenvalues. If the j-th eigenvalue is real, then
  2699  //  u_j = VL[:,j],
  2700  //  v_j = VR[:,j],
  2701  // and if it is not real, then j and j+1 form a complex conjugate pair and the
  2702  // eigenvectors can be recovered as
  2703  //  u_j     = VL[:,j] + i*VL[:,j+1],
  2704  //  u_{j+1} = VL[:,j] - i*VL[:,j+1],
  2705  //  v_j     = VR[:,j] + i*VR[:,j+1],
  2706  //  v_{j+1} = VR[:,j] - i*VR[:,j+1].
  2707  // where i is the imaginary unit. The computed eigenvectors are normalized to
  2708  // have Euclidean norm equal to 1 and largest component real.
  2709  //
  2710  // Left eigenvectors will be computed only if jobvl == lapack.ComputeLeftEV,
  2711  // otherwise jobvl must be lapack.None. Right eigenvectors will be computed
  2712  // only if jobvr == lapack.ComputeRightEV, otherwise jobvr must be lapack.None.
  2713  // For other values of jobvl and jobvr Dgeev will panic.
  2714  //
  2715  // wr and wi contain the real and imaginary parts, respectively, of the computed
  2716  // eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with
  2717  // the eigenvalue having the positive imaginary part first.
  2718  // wr and wi must have length n, and Dgeev will panic otherwise.
  2719  //
  2720  // work must have length at least lwork and lwork must be at least max(1,4*n) if
  2721  // the left or right eigenvectors are computed, and at least max(1,3*n) if no
  2722  // eigenvectors are computed. For good performance, lwork must generally be
  2723  // larger.  On return, optimal value of lwork will be stored in work[0].
  2724  //
  2725  // If lwork == -1, instead of performing Dgeev, the function only calculates the
  2726  // optimal vaule of lwork and stores it into work[0].
  2727  //
  2728  // On return, first is the index of the first valid eigenvalue. If first == 0,
  2729  // all eigenvalues and eigenvectors have been computed. If first is positive,
  2730  // Dgeev failed to compute all the eigenvalues, no eigenvectors have been
  2731  // computed and wr[first:] and wi[first:] contain those eigenvalues which have
  2732  // converged.
  2733  func (impl Implementation) Dgeev(jobvl lapack.LeftEVJob, jobvr lapack.RightEVJob, n int, a []float64, lda int, wr, wi []float64, vl []float64, ldvl int, vr []float64, ldvr int, work []float64, lwork int) (first int) {
  2734  	var wantvl bool
  2735  	switch jobvl {
  2736  	default:
  2737  		panic("lapack: invalid LeftEVJob")
  2738  	case lapack.ComputeLeftEV:
  2739  		wantvl = true
  2740  	case lapack.None:
  2741  		wantvl = false
  2742  	}
  2743  	var wantvr bool
  2744  	switch jobvr {
  2745  	default:
  2746  		panic("lapack: invalid RightEVJob")
  2747  	case lapack.ComputeRightEV:
  2748  		wantvr = true
  2749  	case lapack.None:
  2750  		wantvr = false
  2751  	}
  2752  	switch {
  2753  	case n < 0:
  2754  		panic(nLT0)
  2755  	case len(work) < lwork:
  2756  		panic(shortWork)
  2757  	}
  2758  	var minwrk int
  2759  	if wantvl || wantvr {
  2760  		minwrk = max(1, 4*n)
  2761  	} else {
  2762  		minwrk = max(1, 3*n)
  2763  	}
  2764  	if lwork != -1 {
  2765  		checkMatrix(n, n, a, lda)
  2766  		if wantvl {
  2767  			checkMatrix(n, n, vl, ldvl)
  2768  		}
  2769  		if wantvr {
  2770  			checkMatrix(n, n, vr, ldvr)
  2771  		}
  2772  		switch {
  2773  		case len(wr) != n:
  2774  			panic("lapack: bad length of wr")
  2775  		case len(wi) != n:
  2776  			panic("lapack: bad length of wi")
  2777  		case lwork < minwrk:
  2778  			panic(badWork)
  2779  		}
  2780  	}
  2781  
  2782  	// Quick return if possible.
  2783  	if n == 0 {
  2784  		work[0] = 1
  2785  		return 0
  2786  	}
  2787  
  2788  	first = lapacke.Dgeev(lapack.Job(jobvl), lapack.Job(jobvr), n, a, max(n, lda), wr, wi,
  2789  		vl, max(n, ldvl), vr, max(n, ldvr), work, lwork)
  2790  	if lwork == -1 && int(work[0]) < minwrk {
  2791  		work[0] = float64(minwrk)
  2792  	}
  2793  	return first
  2794  }
  2795  
  2796  // Dtgsja computes the generalized singular value decomposition (GSVD)
  2797  // of two real upper triangular or trapezoidal matrices A and B.
  2798  //
  2799  // A and B have the following forms, which may be obtained by the
  2800  // preprocessing subroutine Dggsvp from a general m×n matrix A and p×n
  2801  // matrix B:
  2802  //
  2803  //            n-k-l  k    l
  2804  //  A =    k [  0   A12  A13 ] if m-k-l >= 0;
  2805  //         l [  0    0   A23 ]
  2806  //     m-k-l [  0    0    0  ]
  2807  //
  2808  //            n-k-l  k    l
  2809  //  A =    k [  0   A12  A13 ] if m-k-l < 0;
  2810  //       m-k [  0    0   A23 ]
  2811  //
  2812  //            n-k-l  k    l
  2813  //  B =    l [  0    0   B13 ]
  2814  //       p-l [  0    0    0  ]
  2815  //
  2816  // where the k×k matrix A12 and l×l matrix B13 are non-singular
  2817  // upper triangular. A23 is l×l upper triangular if m-k-l >= 0,
  2818  // otherwise A23 is (m-k)×l upper trapezoidal.
  2819  //
  2820  // On exit,
  2821  //
  2822  //  U^T*A*Q = D1*[ 0 R ], V^T*B*Q = D2*[ 0 R ],
  2823  //
  2824  // where U, V and Q are orthogonal matrices.
  2825  // R is a non-singular upper triangular matrix, and D1 and D2 are
  2826  // diagonal matrices, which are of the following structures:
  2827  //
  2828  // If m-k-l >= 0,
  2829  //
  2830  //                    k  l
  2831  //       D1 =     k [ I  0 ]
  2832  //                l [ 0  C ]
  2833  //            m-k-l [ 0  0 ]
  2834  //
  2835  //                  k  l
  2836  //       D2 = l   [ 0  S ]
  2837  //            p-l [ 0  0 ]
  2838  //
  2839  //               n-k-l  k    l
  2840  //  [ 0 R ] = k [  0   R11  R12 ] k
  2841  //            l [  0    0   R22 ] l
  2842  //
  2843  // where
  2844  //
  2845  //  C = diag( alpha_k, ... , alpha_{k+l} ),
  2846  //  S = diag( beta_k,  ... , beta_{k+l} ),
  2847  //  C^2 + S^2 = I.
  2848  //
  2849  // R is stored in
  2850  //  A[0:k+l, n-k-l:n]
  2851  // on exit.
  2852  //
  2853  // If m-k-l < 0,
  2854  //
  2855  //                 k m-k k+l-m
  2856  //      D1 =   k [ I  0    0  ]
  2857  //           m-k [ 0  C    0  ]
  2858  //
  2859  //                   k m-k k+l-m
  2860  //      D2 =   m-k [ 0  S    0  ]
  2861  //           k+l-m [ 0  0    I  ]
  2862  //             p-l [ 0  0    0  ]
  2863  //
  2864  //                 n-k-l  k   m-k  k+l-m
  2865  //  [ 0 R ] =    k [ 0    R11  R12  R13 ]
  2866  //             m-k [ 0     0   R22  R23 ]
  2867  //           k+l-m [ 0     0    0   R33 ]
  2868  //
  2869  // where
  2870  //  C = diag( alpha_k, ... , alpha_m ),
  2871  //  S = diag( beta_k,  ... , beta_m ),
  2872  //  C^2 + S^2 = I.
  2873  //
  2874  //  R = [ R11 R12 R13 ] is stored in A[1:m, n-k-l+1:n]
  2875  //      [  0  R22 R23 ]
  2876  // and R33 is stored in
  2877  //  B[m-k:l, n+m-k-l:n] on exit.
  2878  //
  2879  // The computation of the orthogonal transformation matrices U, V or Q
  2880  // is optional. These matrices may either be formed explicitly, or they
  2881  // may be post-multiplied into input matrices U1, V1, or Q1.
  2882  //
  2883  // Dtgsja essentially uses a variant of Kogbetliantz algorithm to reduce
  2884  // min(l,m-k)×l triangular or trapezoidal matrix A23 and l×l
  2885  // matrix B13 to the form:
  2886  //
  2887  //  U1^T*A13*Q1 = C1*R1; V1^T*B13*Q1 = S1*R1,
  2888  //
  2889  // where U1, V1 and Q1 are orthogonal matrices. C1 and S1 are diagonal
  2890  // matrices satisfying
  2891  //
  2892  //  C1^2 + S1^2 = I,
  2893  //
  2894  // and R1 is an l×l non-singular upper triangular matrix.
  2895  //
  2896  // jobU, jobV and jobQ are options for computing the orthogonal matrices. The behavior
  2897  // is as follows
  2898  //  jobU == lapack.GSVDU        Compute orthogonal matrix U
  2899  //  jobU == lapack.GSVDUnit     Use unit-initialized matrix
  2900  //  jobU == lapack.GSVDNone     Do not compute orthogonal matrix.
  2901  // The behavior is the same for jobV and jobQ with the exception that instead of
  2902  // lapack.GSVDU these accept lapack.GSVDV and lapack.GSVDQ respectively.
  2903  // The matrices U, V and Q must be m×m, p×p and n×n respectively unless the
  2904  // relevant job parameter is lapack.GSVDNone.
  2905  //
  2906  // k and l specify the sub-blocks in the input matrices A and B:
  2907  //  A23 = A[k:min(k+l,m), n-l:n) and B13 = B[0:l, n-l:n]
  2908  // of A and B, whose GSVD is going to be computed by Dtgsja.
  2909  //
  2910  // tola and tolb are the convergence criteria for the Jacobi-Kogbetliantz
  2911  // iteration procedure. Generally, they are the same as used in the preprocessing
  2912  // step, for example,
  2913  //  tola = max(m, n)*norm(A)*eps,
  2914  //  tolb = max(p, n)*norm(B)*eps,
  2915  // where eps is the machine epsilon.
  2916  //
  2917  // work must have length at least 2*n, otherwise Dtgsja will panic.
  2918  //
  2919  // alpha and beta must have length n or Dtgsja will panic. On exit, alpha and
  2920  // beta contain the generalized singular value pairs of A and B
  2921  //   alpha[0:k] = 1,
  2922  //   beta[0:k]  = 0,
  2923  // if m-k-l >= 0,
  2924  //   alpha[k:k+l] = diag(C),
  2925  //   beta[k:k+l]  = diag(S),
  2926  // if m-k-l < 0,
  2927  //   alpha[k:m]= C, alpha[m:k+l]= 0
  2928  //   beta[k:m] = S, beta[m:k+l] = 1.
  2929  // if k+l < n,
  2930  //   alpha[k+l:n] = 0 and
  2931  //   beta[k+l:n]  = 0.
  2932  //
  2933  // On exit, A[n-k:n, 0:min(k+l,m)] contains the triangular matrix R or part of R
  2934  // and if necessary, B[m-k:l, n+m-k-l:n] contains a part of R.
  2935  //
  2936  // Dtgsja returns whether the routine converged and the number of iteration cycles
  2937  // that were run.
  2938  //
  2939  // Dtgsja is an internal routine. It is exported for testing purposes.
  2940  func (impl Implementation) Dtgsja(jobU, jobV, jobQ lapack.GSVDJob, m, p, n, k, l int, a []float64, lda int, b []float64, ldb int, tola, tolb float64, alpha, beta, u []float64, ldu int, v []float64, ldv int, q []float64, ldq int, work []float64) (cycles int, ok bool) {
  2941  	checkMatrix(m, n, a, lda)
  2942  	checkMatrix(p, n, b, ldb)
  2943  
  2944  	if len(alpha) != n {
  2945  		panic(badAlpha)
  2946  	}
  2947  	if len(beta) != n {
  2948  		panic(badBeta)
  2949  	}
  2950  
  2951  	initu := jobU == lapack.GSVDUnit
  2952  	wantu := initu || jobU == lapack.GSVDU
  2953  	if !initu && !wantu && jobU != lapack.GSVDNone {
  2954  		panic(badGSVDJob + "U")
  2955  	}
  2956  	if jobU != lapack.GSVDNone {
  2957  		checkMatrix(m, m, u, ldu)
  2958  	}
  2959  
  2960  	initv := jobV == lapack.GSVDUnit
  2961  	wantv := initv || jobV == lapack.GSVDV
  2962  	if !initv && !wantv && jobV != lapack.GSVDNone {
  2963  		panic(badGSVDJob + "V")
  2964  	}
  2965  	if jobV != lapack.GSVDNone {
  2966  		checkMatrix(p, p, v, ldv)
  2967  	}
  2968  
  2969  	initq := jobQ == lapack.GSVDUnit
  2970  	wantq := initq || jobQ == lapack.GSVDQ
  2971  	if !initq && !wantq && jobQ != lapack.GSVDNone {
  2972  		panic(badGSVDJob + "Q")
  2973  	}
  2974  	if jobQ != lapack.GSVDNone {
  2975  		checkMatrix(n, n, q, ldq)
  2976  	}
  2977  
  2978  	if len(work) < 2*n {
  2979  		panic(badWork)
  2980  	}
  2981  
  2982  	ncycle := []int32{0}
  2983  	ok = lapacke.Dtgsja(lapack.Job(jobU), lapack.Job(jobV), lapack.Job(jobQ), m, p, n, k, l, a, lda, b, ldb, tola, tolb, alpha, beta, u, ldu, v, ldv, q, ldq, work, ncycle)
  2984  	return int(ncycle[0]), ok
  2985  }