github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dgesvd.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/jingcheng-WU/gonum/blas"
    11  	"github.com/jingcheng-WU/gonum/blas/blas64"
    12  	"github.com/jingcheng-WU/gonum/lapack"
    13  )
    14  
    15  const noSVDO = "dgesvd: not coded for overwrite"
    16  
    17  // Dgesvd computes the singular value decomposition of the input matrix A.
    18  //
    19  // The singular value decomposition is
    20  //  A = U * Sigma * Vᵀ
    21  // where Sigma is an m×n diagonal matrix containing the singular values of A,
    22  // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
    23  // min(m,n) columns of U and V are the left and right singular vectors of A
    24  // respectively.
    25  //
    26  // jobU and jobVT are options for computing the singular vectors. The behavior
    27  // is as follows
    28  //  jobU == lapack.SVDAll       All m columns of U are returned in u
    29  //  jobU == lapack.SVDStore     The first min(m,n) columns are returned in u
    30  //  jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
    31  //  jobU == lapack.SVDNone      The columns of U are not computed.
    32  // The behavior is the same for jobVT and the rows of Vᵀ. At most one of jobU
    33  // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
    34  //
    35  // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
    36  // the data is overwritten. On exit, A contains the appropriate singular vectors
    37  // if either job is lapack.SVDOverwrite.
    38  //
    39  // s is a slice of length at least min(m,n) and on exit contains the singular
    40  // values in decreasing order.
    41  //
    42  // u contains the left singular vectors on exit, stored column-wise. If
    43  // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDStore u is
    44  // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
    45  // not used.
    46  //
    47  // vt contains the left singular vectors on exit, stored row-wise. If
    48  // jobV == lapack.SVDAll, vt is of size n×n. If jobVT == lapack.SVDStore vt is
    49  // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
    50  // not used.
    51  //
    52  // work is a slice for storing temporary memory, and lwork is the usable size of
    53  // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
    54  // If lwork == -1, instead of performing Dgesvd, the optimal work length will be
    55  // stored into work[0]. Dgesvd will panic if the working memory has insufficient
    56  // storage.
    57  //
    58  // Dgesvd returns whether the decomposition successfully completed.
    59  func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
    60  	if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
    61  		panic(noSVDO)
    62  	}
    63  
    64  	wantua := jobU == lapack.SVDAll
    65  	wantus := jobU == lapack.SVDStore
    66  	wantuas := wantua || wantus
    67  	wantuo := jobU == lapack.SVDOverwrite
    68  	wantun := jobU == lapack.SVDNone
    69  	if !(wantua || wantus || wantuo || wantun) {
    70  		panic(badSVDJob)
    71  	}
    72  
    73  	wantva := jobVT == lapack.SVDAll
    74  	wantvs := jobVT == lapack.SVDStore
    75  	wantvas := wantva || wantvs
    76  	wantvo := jobVT == lapack.SVDOverwrite
    77  	wantvn := jobVT == lapack.SVDNone
    78  	if !(wantva || wantvs || wantvo || wantvn) {
    79  		panic(badSVDJob)
    80  	}
    81  
    82  	if wantuo && wantvo {
    83  		panic(bothSVDOver)
    84  	}
    85  
    86  	minmn := min(m, n)
    87  	minwork := 1
    88  	if minmn > 0 {
    89  		minwork = max(3*minmn+max(m, n), 5*minmn)
    90  	}
    91  	switch {
    92  	case m < 0:
    93  		panic(mLT0)
    94  	case n < 0:
    95  		panic(nLT0)
    96  	case lda < max(1, n):
    97  		panic(badLdA)
    98  	case ldu < 1, wantua && ldu < m, wantus && ldu < minmn:
    99  		panic(badLdU)
   100  	case ldvt < 1 || (wantvas && ldvt < n):
   101  		panic(badLdVT)
   102  	case lwork < minwork && lwork != -1:
   103  		panic(badLWork)
   104  	case len(work) < max(1, lwork):
   105  		panic(shortWork)
   106  	}
   107  
   108  	// Quick return if possible.
   109  	if minmn == 0 {
   110  		work[0] = 1
   111  		return true
   112  	}
   113  
   114  	// Compute optimal workspace size for subroutines.
   115  	opts := string(jobU) + string(jobVT)
   116  	mnthr := impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
   117  	maxwrk := 1
   118  	var wrkbl, bdspac int
   119  	if m >= n {
   120  		bdspac = 5 * n
   121  		impl.Dgeqrf(m, n, a, lda, nil, work, -1)
   122  		lwork_dgeqrf := int(work[0])
   123  
   124  		impl.Dorgqr(m, n, n, a, lda, nil, work, -1)
   125  		lwork_dorgqr_n := int(work[0])
   126  		impl.Dorgqr(m, m, n, a, lda, nil, work, -1)
   127  		lwork_dorgqr_m := int(work[0])
   128  
   129  		impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1)
   130  		lwork_dgebrd := int(work[0])
   131  
   132  		impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, nil, work, -1)
   133  		lwork_dorgbr_p := int(work[0])
   134  
   135  		impl.Dorgbr(lapack.GenerateQ, n, n, n, a, lda, nil, work, -1)
   136  		lwork_dorgbr_q := int(work[0])
   137  
   138  		if m >= mnthr {
   139  			if wantun {
   140  				// Path 1 (m much larger than n, jobU == None)
   141  				maxwrk = n + lwork_dgeqrf
   142  				maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
   143  				if wantvo || wantvas {
   144  					maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
   145  				}
   146  				maxwrk = max(maxwrk, bdspac)
   147  			} else if wantuo && wantvn {
   148  				// Path 2 (m much larger than n, jobU == Overwrite, jobVT == None)
   149  				wrkbl = n + lwork_dgeqrf
   150  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   151  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   152  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   153  				wrkbl = max(wrkbl, bdspac)
   154  				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
   155  			} else if wantuo && wantvas {
   156  				// Path 3 (m much larger than n, jobU == Overwrite, jobVT == Store or All)
   157  				wrkbl = n + lwork_dgeqrf
   158  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   159  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   160  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   161  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   162  				wrkbl = max(wrkbl, bdspac)
   163  				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
   164  			} else if wantus && wantvn {
   165  				// Path 4 (m much larger than n, jobU == Store, jobVT == None)
   166  				wrkbl = n + lwork_dgeqrf
   167  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   168  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   169  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   170  				wrkbl = max(wrkbl, bdspac)
   171  				maxwrk = n*n + wrkbl
   172  			} else if wantus && wantvo {
   173  				// Path 5 (m much larger than n, jobU == Store, jobVT == Overwrite)
   174  				wrkbl = n + lwork_dgeqrf
   175  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   176  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   177  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   178  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   179  				wrkbl = max(wrkbl, bdspac)
   180  				maxwrk = 2*n*n + wrkbl
   181  			} else if wantus && wantvas {
   182  				// Path 6 (m much larger than n, jobU == Store, jobVT == Store or All)
   183  				wrkbl = n + lwork_dgeqrf
   184  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   185  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   186  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   187  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   188  				wrkbl = max(wrkbl, bdspac)
   189  				maxwrk = n*n + wrkbl
   190  			} else if wantua && wantvn {
   191  				// Path 7 (m much larger than n, jobU == All, jobVT == None)
   192  				wrkbl = n + lwork_dgeqrf
   193  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   194  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   195  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   196  				wrkbl = max(wrkbl, bdspac)
   197  				maxwrk = n*n + wrkbl
   198  			} else if wantua && wantvo {
   199  				// Path 8 (m much larger than n, jobU == All, jobVT == Overwrite)
   200  				wrkbl = n + lwork_dgeqrf
   201  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   202  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   203  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   204  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   205  				wrkbl = max(wrkbl, bdspac)
   206  				maxwrk = 2*n*n + wrkbl
   207  			} else if wantua && wantvas {
   208  				// Path 9 (m much larger than n, jobU == All, jobVT == Store or All)
   209  				wrkbl = n + lwork_dgeqrf
   210  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   211  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   212  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   213  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   214  				wrkbl = max(wrkbl, bdspac)
   215  				maxwrk = n*n + wrkbl
   216  			}
   217  		} else {
   218  			// Path 10 (m at least n, but not much larger)
   219  			impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
   220  			lwork_dgebrd := int(work[0])
   221  			maxwrk = 3*n + lwork_dgebrd
   222  			if wantus || wantuo {
   223  				impl.Dorgbr(lapack.GenerateQ, m, n, n, a, lda, nil, work, -1)
   224  				lwork_dorgbr_q = int(work[0])
   225  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
   226  			}
   227  			if wantua {
   228  				impl.Dorgbr(lapack.GenerateQ, m, m, n, a, lda, nil, work, -1)
   229  				lwork_dorgbr_q := int(work[0])
   230  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
   231  			}
   232  			if !wantvn {
   233  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
   234  			}
   235  			maxwrk = max(maxwrk, bdspac)
   236  		}
   237  	} else {
   238  		bdspac = 5 * m
   239  
   240  		impl.Dgelqf(m, n, a, lda, nil, work, -1)
   241  		lwork_dgelqf := int(work[0])
   242  
   243  		impl.Dorglq(n, n, m, nil, n, nil, work, -1)
   244  		lwork_dorglq_n := int(work[0])
   245  		impl.Dorglq(m, n, m, a, lda, nil, work, -1)
   246  		lwork_dorglq_m := int(work[0])
   247  
   248  		impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1)
   249  		lwork_dgebrd := int(work[0])
   250  
   251  		impl.Dorgbr(lapack.GeneratePT, m, m, m, a, n, nil, work, -1)
   252  		lwork_dorgbr_p := int(work[0])
   253  
   254  		impl.Dorgbr(lapack.GenerateQ, m, m, m, a, n, nil, work, -1)
   255  		lwork_dorgbr_q := int(work[0])
   256  
   257  		if n >= mnthr {
   258  			if wantvn {
   259  				// Path 1t (n much larger than m, jobVT == None)
   260  				maxwrk = m + lwork_dgelqf
   261  				maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
   262  				if wantuo || wantuas {
   263  					maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
   264  				}
   265  				maxwrk = max(maxwrk, bdspac)
   266  			} else if wantvo && wantun {
   267  				// Path 2t (n much larger than m, jobU == None, jobVT == Overwrite)
   268  				wrkbl = m + lwork_dgelqf
   269  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   270  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   271  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   272  				wrkbl = max(wrkbl, bdspac)
   273  				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
   274  			} else if wantvo && wantuas {
   275  				// Path 3t (n much larger than m, jobU == Store or All, jobVT == Overwrite)
   276  				wrkbl = m + lwork_dgelqf
   277  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   278  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   279  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   280  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   281  				wrkbl = max(wrkbl, bdspac)
   282  				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
   283  			} else if wantvs && wantun {
   284  				// Path 4t (n much larger than m, jobU == None, jobVT == Store)
   285  				wrkbl = m + lwork_dgelqf
   286  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   287  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   288  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   289  				wrkbl = max(wrkbl, bdspac)
   290  				maxwrk = m*m + wrkbl
   291  			} else if wantvs && wantuo {
   292  				// Path 5t (n much larger than m, jobU == Overwrite, jobVT == Store)
   293  				wrkbl = m + lwork_dgelqf
   294  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   295  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   296  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   297  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   298  				wrkbl = max(wrkbl, bdspac)
   299  				maxwrk = 2*m*m + wrkbl
   300  			} else if wantvs && wantuas {
   301  				// Path 6t (n much larger than m, jobU == Store or All, jobVT == Store)
   302  				wrkbl = m + lwork_dgelqf
   303  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   304  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   305  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   306  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   307  				wrkbl = max(wrkbl, bdspac)
   308  				maxwrk = m*m + wrkbl
   309  			} else if wantva && wantun {
   310  				// Path 7t (n much larger than m, jobU== None, jobVT == All)
   311  				wrkbl = m + lwork_dgelqf
   312  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   313  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   314  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   315  				wrkbl = max(wrkbl, bdspac)
   316  				maxwrk = m*m + wrkbl
   317  			} else if wantva && wantuo {
   318  				// Path 8t (n much larger than m, jobU == Overwrite, jobVT == All)
   319  				wrkbl = m + lwork_dgelqf
   320  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   321  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   322  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   323  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   324  				wrkbl = max(wrkbl, bdspac)
   325  				maxwrk = 2*m*m + wrkbl
   326  			} else if wantva && wantuas {
   327  				// Path 9t (n much larger than m, jobU == Store or All, jobVT == All)
   328  				wrkbl = m + lwork_dgelqf
   329  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   330  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   331  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   332  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   333  				wrkbl = max(wrkbl, bdspac)
   334  				maxwrk = m*m + wrkbl
   335  			}
   336  		} else {
   337  			// Path 10t (n greater than m, but not much larger)
   338  			impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
   339  			lwork_dgebrd = int(work[0])
   340  			maxwrk = 3*m + lwork_dgebrd
   341  			if wantvs || wantvo {
   342  				impl.Dorgbr(lapack.GeneratePT, m, n, m, a, n, nil, work, -1)
   343  				lwork_dorgbr_p = int(work[0])
   344  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
   345  			}
   346  			if wantva {
   347  				impl.Dorgbr(lapack.GeneratePT, n, n, m, a, n, nil, work, -1)
   348  				lwork_dorgbr_p = int(work[0])
   349  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
   350  			}
   351  			if !wantun {
   352  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
   353  			}
   354  			maxwrk = max(maxwrk, bdspac)
   355  		}
   356  	}
   357  
   358  	maxwrk = max(maxwrk, minwork)
   359  	if lwork == -1 {
   360  		work[0] = float64(maxwrk)
   361  		return true
   362  	}
   363  
   364  	if len(a) < (m-1)*lda+n {
   365  		panic(shortA)
   366  	}
   367  	if len(s) < minmn {
   368  		panic(shortS)
   369  	}
   370  	if (len(u) < (m-1)*ldu+m && wantua) || (len(u) < (m-1)*ldu+minmn && wantus) {
   371  		panic(shortU)
   372  	}
   373  	if (len(vt) < (n-1)*ldvt+n && wantva) || (len(vt) < (minmn-1)*ldvt+n && wantvs) {
   374  		panic(shortVT)
   375  	}
   376  
   377  	// Perform decomposition.
   378  	eps := dlamchE
   379  	smlnum := math.Sqrt(dlamchS) / eps
   380  	bignum := 1 / smlnum
   381  
   382  	// Scale A if max element outside range [smlnum, bignum].
   383  	anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
   384  	var iscl bool
   385  	if anrm > 0 && anrm < smlnum {
   386  		iscl = true
   387  		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
   388  	} else if anrm > bignum {
   389  		iscl = true
   390  		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
   391  	}
   392  
   393  	bi := blas64.Implementation()
   394  	var ie int
   395  	if m >= n {
   396  		// If A has sufficiently more rows than columns, use the QR decomposition.
   397  		if m >= mnthr {
   398  			// m >> n
   399  			if wantun {
   400  				// Path 1.
   401  				itau := 0
   402  				iwork := itau + n
   403  
   404  				// Compute A = Q * R.
   405  				impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   406  
   407  				// Zero out below R.
   408  				impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   409  				ie = 0
   410  				itauq := ie + n
   411  				itaup := itauq + n
   412  				iwork = itaup + n
   413  				// Bidiagonalize R in A.
   414  				impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:],
   415  					work[itaup:], work[iwork:], lwork-iwork)
   416  				ncvt := 0
   417  				if wantvo || wantvas {
   418  					impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, work[itaup:],
   419  						work[iwork:], lwork-iwork)
   420  					ncvt = n
   421  				}
   422  				iwork = ie + n
   423  
   424  				// Perform bidiagonal QR iteration computing right singular vectors
   425  				// of A in A if desired.
   426  				ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:],
   427  					a, lda, work, 1, work, 1, work[iwork:])
   428  
   429  				// If right singular vectors desired in VT, copy them there.
   430  				if wantvas {
   431  					impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
   432  				}
   433  			} else if wantuo && wantvn {
   434  				// Path 2
   435  				panic(noSVDO)
   436  			} else if wantuo && wantvas {
   437  				// Path 3
   438  				panic(noSVDO)
   439  			} else if wantus {
   440  				if wantvn {
   441  					// Path 4
   442  					if lwork >= n*n+max(4*n, bdspac) {
   443  						// Sufficient workspace for a fast algorithm.
   444  						ir := 0
   445  						var ldworkr int
   446  						if lwork >= wrkbl+lda*n {
   447  							ldworkr = lda
   448  						} else {
   449  							ldworkr = n
   450  						}
   451  						itau := ir + ldworkr*n
   452  						iwork := itau + n
   453  						// Compute A = Q * R.
   454  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   455  
   456  						// Copy R to work[ir:], zeroing out below it.
   457  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
   458  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
   459  
   460  						// Generate Q in A.
   461  						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   462  						ie := itau
   463  						itauq := ie + n
   464  						itaup := itauq + n
   465  						iwork = itaup + n
   466  
   467  						// Bidiagonalize R in work[ir:].
   468  						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
   469  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   470  
   471  						// Generate left vectors bidiagonalizing R in work[ir:].
   472  						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr,
   473  							work[itauq:], work[iwork:], lwork-iwork)
   474  						iwork = ie + n
   475  
   476  						// Perform bidiagonal QR iteration, compuing left singular
   477  						// vectors of R in work[ir:].
   478  						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
   479  							work[ir:], ldworkr, work, 1, work[iwork:])
   480  
   481  						// Multiply Q in A by left singular vectors of R in
   482  						// work[ir:], storing result in U.
   483  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
   484  							work[ir:], ldworkr, 0, u, ldu)
   485  					} else {
   486  						// Insufficient workspace for a fast algorithm.
   487  						itau := 0
   488  						iwork := itau + n
   489  
   490  						// Compute A = Q*R, copying result to U.
   491  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   492  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   493  
   494  						// Generate Q in U.
   495  						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   496  						ie := itau
   497  						itauq := ie + n
   498  						itaup := itauq + n
   499  						iwork = itaup + n
   500  
   501  						// Zero out below R in A.
   502  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   503  
   504  						// Bidiagonalize R in A.
   505  						impl.Dgebrd(n, n, a, lda, s, work[ie:],
   506  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   507  
   508  						// Multiply Q in U by left vectors bidiagonalizing R.
   509  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   510  							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   511  						iwork = ie + n
   512  
   513  						// Perform bidiagonal QR iteration, computing left
   514  						// singular vectors of A in U.
   515  						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1,
   516  							u, ldu, work, 1, work[iwork:])
   517  					}
   518  				} else if wantvo {
   519  					// Path 5
   520  					panic(noSVDO)
   521  				} else if wantvas {
   522  					// Path 6
   523  					if lwork >= n*n+max(4*n, bdspac) {
   524  						// Sufficient workspace for a fast algorithm.
   525  						iu := 0
   526  						var ldworku int
   527  						if lwork >= wrkbl+lda*n {
   528  							ldworku = lda
   529  						} else {
   530  							ldworku = n
   531  						}
   532  						itau := iu + ldworku*n
   533  						iwork := itau + n
   534  
   535  						// Compute A = Q * R.
   536  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   537  						// Copy R to work[iu:], zeroing out below it.
   538  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
   539  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
   540  
   541  						// Generate Q in A.
   542  						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   543  
   544  						ie := itau
   545  						itauq := ie + n
   546  						itaup := itauq + n
   547  						iwork = itaup + n
   548  
   549  						// Bidiagonalize R in work[iu:], copying result to VT.
   550  						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
   551  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   552  						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
   553  
   554  						// Generate left bidiagonalizing vectors in work[iu:].
   555  						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku,
   556  							work[itauq:], work[iwork:], lwork-iwork)
   557  
   558  						// Generate right bidiagonalizing vectors in VT.
   559  						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   560  							work[itaup:], work[iwork:], lwork-iwork)
   561  						iwork = ie + n
   562  
   563  						// Perform bidiagonal QR iteration, computing left singular
   564  						// vectors of R in work[iu:], and computing right singular
   565  						// vectors of R in VT.
   566  						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
   567  							vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
   568  
   569  						// Multiply Q in A by left singular vectors of R in
   570  						// work[iu:], storing result in U.
   571  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
   572  							work[iu:], ldworku, 0, u, ldu)
   573  					} else {
   574  						// Insufficient workspace for a fast algorithm.
   575  						itau := 0
   576  						iwork := itau + n
   577  
   578  						// Compute A = Q * R, copying result to U.
   579  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   580  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   581  
   582  						// Generate Q in U.
   583  						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   584  
   585  						// Copy R to VT, zeroing out below it.
   586  						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   587  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
   588  
   589  						ie := itau
   590  						itauq := ie + n
   591  						itaup := itauq + n
   592  						iwork = itaup + n
   593  
   594  						// Bidiagonalize R in VT.
   595  						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   596  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   597  
   598  						// Multiply Q in U by left bidiagonalizing vectors in VT.
   599  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   600  							vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   601  
   602  						// Generate right bidiagonalizing vectors in VT.
   603  						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   604  							work[itaup:], work[iwork:], lwork-iwork)
   605  						iwork = ie + n
   606  
   607  						// Perform bidiagonal QR iteration, computing left singular
   608  						// vectors of A in U and computing right singular vectors
   609  						// of A in VT.
   610  						ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   611  							vt, ldvt, u, ldu, work, 1, work[iwork:])
   612  					}
   613  				}
   614  			} else if wantua {
   615  				if wantvn {
   616  					// Path 7
   617  					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
   618  						// Sufficient workspace for a fast algorithm.
   619  						ir := 0
   620  						var ldworkr int
   621  						if lwork >= wrkbl+lda*n {
   622  							ldworkr = lda
   623  						} else {
   624  							ldworkr = n
   625  						}
   626  						itau := ir + ldworkr*n
   627  						iwork := itau + n
   628  
   629  						// Compute A = Q*R, copying result to U.
   630  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   631  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   632  
   633  						// Copy R to work[ir:], zeroing out below it.
   634  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
   635  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
   636  
   637  						// Generate Q in U.
   638  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   639  						ie := itau
   640  						itauq := ie + n
   641  						itaup := itauq + n
   642  						iwork = itaup + n
   643  
   644  						// Bidiagonalize R in work[ir:].
   645  						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
   646  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   647  
   648  						// Generate left bidiagonalizing vectors in work[ir:].
   649  						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr,
   650  							work[itauq:], work[iwork:], lwork-iwork)
   651  						iwork = ie + n
   652  
   653  						// Perform bidiagonal QR iteration, computing left singular
   654  						// vectors of R in work[ir:].
   655  						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
   656  							work[ir:], ldworkr, work, 1, work[iwork:])
   657  
   658  						// Multiply Q in U by left singular vectors of R in
   659  						// work[ir:], storing result in A.
   660  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu,
   661  							work[ir:], ldworkr, 0, a, lda)
   662  
   663  						// Copy left singular vectors of A from A to U.
   664  						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
   665  					} else {
   666  						// Insufficient workspace for a fast algorithm.
   667  						itau := 0
   668  						iwork := itau + n
   669  
   670  						// Compute A = Q*R, copying result to U.
   671  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   672  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   673  
   674  						// Generate Q in U.
   675  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   676  						ie := itau
   677  						itauq := ie + n
   678  						itaup := itauq + n
   679  						iwork = itaup + n
   680  
   681  						// Zero out below R in A.
   682  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   683  
   684  						// Bidiagonalize R in A.
   685  						impl.Dgebrd(n, n, a, lda, s, work[ie:],
   686  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   687  
   688  						// Multiply Q in U by left bidiagonalizing vectors in A.
   689  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   690  							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   691  						iwork = ie + n
   692  
   693  						// Perform bidiagonal QR iteration, computing left
   694  						// singular vectors of A in U.
   695  						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:],
   696  							work, 1, u, ldu, work, 1, work[iwork:])
   697  					}
   698  				} else if wantvo {
   699  					// Path 8.
   700  					panic(noSVDO)
   701  				} else if wantvas {
   702  					// Path 9.
   703  					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
   704  						// Sufficient workspace for a fast algorithm.
   705  						iu := 0
   706  						var ldworku int
   707  						if lwork >= wrkbl+lda*n {
   708  							ldworku = lda
   709  						} else {
   710  							ldworku = n
   711  						}
   712  						itau := iu + ldworku*n
   713  						iwork := itau + n
   714  
   715  						// Compute A = Q * R, copying result to U.
   716  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   717  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   718  
   719  						// Generate Q in U.
   720  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   721  
   722  						// Copy R to work[iu:], zeroing out below it.
   723  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
   724  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
   725  
   726  						ie = itau
   727  						itauq := ie + n
   728  						itaup := itauq + n
   729  						iwork = itaup + n
   730  
   731  						// Bidiagonalize R in work[iu:], copying result to VT.
   732  						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
   733  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   734  						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
   735  
   736  						// Generate left bidiagonalizing vectors in work[iu:].
   737  						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku,
   738  							work[itauq:], work[iwork:], lwork-iwork)
   739  
   740  						// Generate right bidiagonalizing vectors in VT.
   741  						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   742  							work[itaup:], work[iwork:], lwork-iwork)
   743  						iwork = ie + n
   744  
   745  						// Perform bidiagonal QR iteration, computing left singular
   746  						// vectors of R in work[iu:] and computing right
   747  						// singular vectors of R in VT.
   748  						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
   749  							vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
   750  
   751  						// Multiply Q in U by left singular vectors of R in
   752  						// work[iu:], storing result in A.
   753  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1,
   754  							u, ldu, work[iu:], ldworku, 0, a, lda)
   755  
   756  						// Copy left singular vectors of A from A to U.
   757  						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
   758  
   759  						/*
   760  							// Bidiagonalize R in VT.
   761  							impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   762  								work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   763  
   764  							// Multiply Q in U by left bidiagonalizing vectors in VT.
   765  							impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
   766  								m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   767  
   768  							// Generate right bidiagonalizing vectors in VT.
   769  							impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   770  								work[itaup:], work[iwork:], lwork-iwork)
   771  							iwork = ie + n
   772  
   773  							// Perform bidiagonal QR iteration, computing left singular
   774  							// vectors of A in U and computing right singular vectors
   775  							// of A in VT.
   776  							ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   777  								vt, ldvt, u, ldu, work, 1, work[iwork:])
   778  						*/
   779  					} else {
   780  						// Insufficient workspace for a fast algorithm.
   781  						itau := 0
   782  						iwork := itau + n
   783  
   784  						// Compute A = Q*R, copying result to U.
   785  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   786  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   787  
   788  						// Generate Q in U.
   789  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   790  
   791  						// Copy R from A to VT, zeroing out below it.
   792  						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   793  						if n > 1 {
   794  							impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
   795  						}
   796  
   797  						ie := itau
   798  						itauq := ie + n
   799  						itaup := itauq + n
   800  						iwork = itaup + n
   801  
   802  						// Bidiagonalize R in VT.
   803  						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   804  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   805  
   806  						// Multiply Q in U by left bidiagonalizing vectors in VT.
   807  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
   808  							m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   809  
   810  						// Generate right bidiagonizing vectors in VT.
   811  						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   812  							work[itaup:], work[iwork:], lwork-iwork)
   813  						iwork = ie + n
   814  
   815  						// Perform bidiagonal QR iteration, computing left singular
   816  						// vectors of A in U and computing right singular vectors
   817  						// of A in VT.
   818  						ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   819  							vt, ldvt, u, ldu, work, 1, work[iwork:])
   820  					}
   821  				}
   822  			}
   823  		} else {
   824  			// Path 10.
   825  			// M at least N, but not much larger.
   826  			ie = 0
   827  			itauq := ie + n
   828  			itaup := itauq + n
   829  			iwork := itaup + n
   830  
   831  			// Bidiagonalize A.
   832  			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:],
   833  				work[itaup:], work[iwork:], lwork-iwork)
   834  			if wantuas {
   835  				// Left singular vectors are desired in U. Copy result to U and
   836  				// generate left biadiagonalizing vectors in U.
   837  				impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   838  				var ncu int
   839  				if wantus {
   840  					ncu = n
   841  				}
   842  				if wantua {
   843  					ncu = m
   844  				}
   845  				impl.Dorgbr(lapack.GenerateQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
   846  			}
   847  			if wantvas {
   848  				// Right singular vectors are desired in VT. Copy result to VT and
   849  				// generate left biadiagonalizing vectors in VT.
   850  				impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   851  				impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
   852  			}
   853  			if wantuo {
   854  				panic(noSVDO)
   855  			}
   856  			if wantvo {
   857  				panic(noSVDO)
   858  			}
   859  			iwork = ie + n
   860  			var nru, ncvt int
   861  			if wantuas || wantuo {
   862  				nru = m
   863  			}
   864  			if wantun {
   865  				nru = 0
   866  			}
   867  			if wantvas || wantvo {
   868  				ncvt = n
   869  			}
   870  			if wantvn {
   871  				ncvt = 0
   872  			}
   873  			if !wantuo && !wantvo {
   874  				// Perform bidiagonal QR iteration, if desired, computing left
   875  				// singular vectors in U and right singular vectors in VT.
   876  				ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:],
   877  					vt, ldvt, u, ldu, work, 1, work[iwork:])
   878  			} else {
   879  				// There will be two branches when the implementation is complete.
   880  				panic(noSVDO)
   881  			}
   882  		}
   883  	} else {
   884  		// A has more columns than rows. If A has sufficiently more columns than
   885  		// rows, first reduce using the LQ decomposition.
   886  		if n >= mnthr {
   887  			// n >> m.
   888  			if wantvn {
   889  				// Path 1t.
   890  				itau := 0
   891  				iwork := itau + m
   892  
   893  				// Compute A = L*Q.
   894  				impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   895  
   896  				// Zero out above L.
   897  				impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
   898  				ie := 0
   899  				itauq := ie + m
   900  				itaup := itauq + m
   901  				iwork = itaup + m
   902  
   903  				// Bidiagonalize L in A.
   904  				impl.Dgebrd(m, m, a, lda, s, work[ie:itauq],
   905  					work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork)
   906  				if wantuo || wantuas {
   907  					impl.Dorgbr(lapack.GenerateQ, m, m, m, a, lda,
   908  						work[itauq:], work[iwork:], lwork-iwork)
   909  				}
   910  				iwork = ie + m
   911  				nru := 0
   912  				if wantuo || wantuas {
   913  					nru = m
   914  				}
   915  
   916  				// Perform bidiagonal QR iteration, computing left singular vectors
   917  				// of A in A if desired.
   918  				ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:],
   919  					work, 1, a, lda, work, 1, work[iwork:])
   920  
   921  				// If left singular vectors desired in U, copy them there.
   922  				if wantuas {
   923  					impl.Dlacpy(blas.All, m, m, a, lda, u, ldu)
   924  				}
   925  			} else if wantvo && wantun {
   926  				// Path 2t.
   927  				panic(noSVDO)
   928  			} else if wantvo && wantuas {
   929  				// Path 3t.
   930  				panic(noSVDO)
   931  			} else if wantvs {
   932  				if wantun {
   933  					// Path 4t.
   934  					if lwork >= m*m+max(4*m, bdspac) {
   935  						// Sufficient workspace for a fast algorithm.
   936  						ir := 0
   937  						var ldworkr int
   938  						if lwork >= wrkbl+lda*m {
   939  							ldworkr = lda
   940  						} else {
   941  							ldworkr = m
   942  						}
   943  						itau := ir + ldworkr*m
   944  						iwork := itau + m
   945  
   946  						// Compute A = L*Q.
   947  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   948  
   949  						// Copy L to work[ir:], zeroing out above it.
   950  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
   951  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
   952  
   953  						// Generate Q in A.
   954  						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
   955  						ie := itau
   956  						itauq := ie + m
   957  						itaup := itauq + m
   958  						iwork = itaup + m
   959  
   960  						// Bidiagonalize L in work[ir:].
   961  						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
   962  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   963  
   964  						// Generate right vectors bidiagonalizing L in work[ir:].
   965  						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr,
   966  							work[itaup:], work[iwork:], lwork-iwork)
   967  						iwork = ie + m
   968  
   969  						// Perform bidiagonal QR iteration, computing right singular
   970  						// vectors of L in work[ir:].
   971  						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
   972  							work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
   973  
   974  						// Multiply right singular vectors of L in work[ir:] by
   975  						// Q in A, storing result in VT.
   976  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
   977  							work[ir:], ldworkr, a, lda, 0, vt, ldvt)
   978  					} else {
   979  						// Insufficient workspace for a fast algorithm.
   980  						itau := 0
   981  						iwork := itau + m
   982  
   983  						// Compute A = L*Q.
   984  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   985  
   986  						// Copy result to VT.
   987  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
   988  
   989  						// Generate Q in VT.
   990  						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
   991  						ie := itau
   992  						itauq := ie + m
   993  						itaup := itauq + m
   994  						iwork = itaup + m
   995  
   996  						// Zero out above L in A.
   997  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
   998  
   999  						// Bidiagonalize L in A.
  1000  						impl.Dgebrd(m, m, a, lda, s, work[ie:],
  1001  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1002  
  1003  						// Multiply right vectors bidiagonalizing L by Q in VT.
  1004  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1005  							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1006  						iwork = ie + m
  1007  
  1008  						// Perform bidiagonal QR iteration, computing right
  1009  						// singular vectors of A in VT.
  1010  						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
  1011  							vt, ldvt, work, 1, work, 1, work[iwork:])
  1012  					}
  1013  				} else if wantuo {
  1014  					// Path 5t.
  1015  					panic(noSVDO)
  1016  				} else if wantuas {
  1017  					// Path 6t.
  1018  					if lwork >= m*m+max(4*m, bdspac) {
  1019  						// Sufficient workspace for a fast algorithm.
  1020  						iu := 0
  1021  						var ldworku int
  1022  						if lwork >= wrkbl+lda*m {
  1023  							ldworku = lda
  1024  						} else {
  1025  							ldworku = m
  1026  						}
  1027  						itau := iu + ldworku*m
  1028  						iwork := itau + m
  1029  
  1030  						// Compute A = L*Q.
  1031  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1032  
  1033  						// Copy L to work[iu:], zeroing out above it.
  1034  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
  1035  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
  1036  
  1037  						// Generate Q in A.
  1038  						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1039  						ie := itau
  1040  						itauq := ie + m
  1041  						itaup := itauq + m
  1042  						iwork = itaup + m
  1043  
  1044  						// Bidiagonalize L in work[iu:], copying result to U.
  1045  						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
  1046  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1047  						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
  1048  
  1049  						// Generate right bidiagionalizing vectors in work[iu:].
  1050  						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku,
  1051  							work[itaup:], work[iwork:], lwork-iwork)
  1052  
  1053  						// Generate left bidiagonalizing vectors in U.
  1054  						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1055  						iwork = ie + m
  1056  
  1057  						// Perform bidiagonal QR iteration, computing left singular
  1058  						// vectors of L in U and computing right singular vectors of
  1059  						// L in work[iu:].
  1060  						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
  1061  							work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
  1062  
  1063  						// Multiply right singular vectors of L in work[iu:] by
  1064  						// Q in A, storing result in VT.
  1065  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1066  							work[iu:], ldworku, a, lda, 0, vt, ldvt)
  1067  					} else {
  1068  						// Insufficient workspace for a fast algorithm.
  1069  						itau := 0
  1070  						iwork := itau + m
  1071  
  1072  						// Compute A = L*Q, copying result to VT.
  1073  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1074  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1075  
  1076  						// Generate Q in VT.
  1077  						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1078  
  1079  						// Copy L to U, zeroing out above it.
  1080  						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1081  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
  1082  
  1083  						ie := itau
  1084  						itauq := ie + m
  1085  						itaup := itauq + m
  1086  						iwork = itaup + m
  1087  
  1088  						// Bidiagonalize L in U.
  1089  						impl.Dgebrd(m, m, u, ldu, s, work[ie:],
  1090  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1091  
  1092  						// Multiply right bidiagonalizing vectors in U by Q in VT.
  1093  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1094  							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1095  
  1096  						// Generate left bidiagonalizing vectors in U.
  1097  						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1098  						iwork = ie + m
  1099  
  1100  						// Perform bidiagonal QR iteration, computing left singular
  1101  						// vectors of A in U and computing right singular vectors
  1102  						// of A in VT.
  1103  						ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt,
  1104  							u, ldu, work, 1, work[iwork:])
  1105  					}
  1106  				}
  1107  			} else if wantva {
  1108  				if wantun {
  1109  					// Path 7t.
  1110  					if lwork >= m*m+max(max(n+m, 4*m), bdspac) {
  1111  						// Sufficient workspace for a fast algorithm.
  1112  						ir := 0
  1113  						var ldworkr int
  1114  						if lwork >= wrkbl+lda*m {
  1115  							ldworkr = lda
  1116  						} else {
  1117  							ldworkr = m
  1118  						}
  1119  						itau := ir + ldworkr*m
  1120  						iwork := itau + m
  1121  
  1122  						// Compute A = L*Q, copying result to VT.
  1123  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1124  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1125  
  1126  						// Copy L to work[ir:], zeroing out above it.
  1127  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
  1128  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
  1129  
  1130  						// Generate Q in VT.
  1131  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1132  
  1133  						ie := itau
  1134  						itauq := ie + m
  1135  						itaup := itauq + m
  1136  						iwork = itaup + m
  1137  
  1138  						// Bidiagonalize L in work[ir:].
  1139  						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
  1140  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1141  
  1142  						// Generate right bidiagonalizing vectors in work[ir:].
  1143  						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr,
  1144  							work[itaup:], work[iwork:], lwork-iwork)
  1145  						iwork = ie + m
  1146  
  1147  						// Perform bidiagonal QR iteration, computing right
  1148  						// singular vectors of L in work[ir:].
  1149  						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
  1150  							work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
  1151  
  1152  						// Multiply right singular vectors of L in work[ir:] by
  1153  						// Q in VT, storing result in A.
  1154  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1155  							work[ir:], ldworkr, vt, ldvt, 0, a, lda)
  1156  
  1157  						// Copy right singular vectors of A from A to VT.
  1158  						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
  1159  					} else {
  1160  						// Insufficient workspace for a fast algorithm.
  1161  						itau := 0
  1162  						iwork := itau + m
  1163  						// Compute A = L * Q, copying result to VT.
  1164  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1165  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1166  
  1167  						// Generate Q in VT.
  1168  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1169  
  1170  						ie := itau
  1171  						itauq := ie + m
  1172  						itaup := itauq + m
  1173  						iwork = itaup + m
  1174  
  1175  						// Zero out above L in A.
  1176  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
  1177  
  1178  						// Bidiagonalize L in A.
  1179  						impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:],
  1180  							work[itaup:], work[iwork:], lwork-iwork)
  1181  
  1182  						// Multiply right bidiagonalizing vectors in A by Q in VT.
  1183  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1184  							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1185  						iwork = ie + m
  1186  
  1187  						// Perform bidiagonal QR iteration, computing right singular
  1188  						// vectors of A in VT.
  1189  						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
  1190  							vt, ldvt, work, 1, work, 1, work[iwork:])
  1191  					}
  1192  				} else if wantuo {
  1193  					panic(noSVDO)
  1194  				} else if wantuas {
  1195  					// Path 9t.
  1196  					if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
  1197  						// Sufficient workspace for a fast algorithm.
  1198  						iu := 0
  1199  
  1200  						var ldworku int
  1201  						if lwork >= wrkbl+lda*m {
  1202  							ldworku = lda
  1203  						} else {
  1204  							ldworku = m
  1205  						}
  1206  						itau := iu + ldworku*m
  1207  						iwork := itau + m
  1208  
  1209  						// Generate A = L * Q copying result to VT.
  1210  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1211  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1212  
  1213  						// Generate Q in VT.
  1214  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1215  
  1216  						// Copy L to work[iu:], zeroing out above it.
  1217  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
  1218  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
  1219  						ie = itau
  1220  						itauq := ie + m
  1221  						itaup := itauq + m
  1222  						iwork = itaup + m
  1223  
  1224  						// Bidiagonalize L in work[iu:], copying result to U.
  1225  						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
  1226  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1227  						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
  1228  
  1229  						// Generate right bidiagonalizing vectors in work[iu:].
  1230  						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku,
  1231  							work[itaup:], work[iwork:], lwork-iwork)
  1232  
  1233  						// Generate left bidiagonalizing vectors in U.
  1234  						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1235  						iwork = ie + m
  1236  
  1237  						// Perform bidiagonal QR iteration, computing left singular
  1238  						// vectors of L in U and computing right singular vectors
  1239  						// of L in work[iu:].
  1240  						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
  1241  							work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
  1242  
  1243  						// Multiply right singular vectors of L in work[iu:]
  1244  						// Q in VT, storing result in A.
  1245  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1246  							work[iu:], ldworku, vt, ldvt, 0, a, lda)
  1247  
  1248  						// Copy right singular vectors of A from A to VT.
  1249  						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
  1250  					} else {
  1251  						// Insufficient workspace for a fast algorithm.
  1252  						itau := 0
  1253  						iwork := itau + m
  1254  
  1255  						// Compute A = L * Q, copying result to VT.
  1256  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1257  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1258  
  1259  						// Generate Q in VT.
  1260  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1261  
  1262  						// Copy L to U, zeroing out above it.
  1263  						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1264  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
  1265  
  1266  						ie = itau
  1267  						itauq := ie + m
  1268  						itaup := itauq + m
  1269  						iwork = itaup + m
  1270  
  1271  						// Bidiagonalize L in U.
  1272  						impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:],
  1273  							work[itaup:], work[iwork:], lwork-iwork)
  1274  
  1275  						// Multiply right bidiagonalizing vectors in U by Q in VT.
  1276  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1277  							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1278  
  1279  						// Generate left bidiagonalizing vectors in U.
  1280  						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1281  						iwork = ie + m
  1282  
  1283  						// Perform bidiagonal QR iteration, computing left singular
  1284  						// vectors of A in U and computing right singular vectors
  1285  						// of A in VT.
  1286  						ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:],
  1287  							vt, ldvt, u, ldu, work, 1, work[iwork:])
  1288  					}
  1289  				}
  1290  			}
  1291  		} else {
  1292  			// Path 10t.
  1293  			// N at least M, but not much larger.
  1294  			ie = 0
  1295  			itauq := ie + m
  1296  			itaup := itauq + m
  1297  			iwork := itaup + m
  1298  
  1299  			// Bidiagonalize A.
  1300  			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1301  			if wantuas {
  1302  				// If left singular vectors desired in U, copy result to U and
  1303  				// generate left bidiagonalizing vectors in U.
  1304  				impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1305  				impl.Dorgbr(lapack.GenerateQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1306  			}
  1307  			if wantvas {
  1308  				// If right singular vectors desired in VT, copy result to VT
  1309  				// and generate right bidiagonalizing vectors in VT.
  1310  				impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1311  				var nrvt int
  1312  				if wantva {
  1313  					nrvt = n
  1314  				} else {
  1315  					nrvt = m
  1316  				}
  1317  				impl.Dorgbr(lapack.GeneratePT, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
  1318  			}
  1319  			if wantuo {
  1320  				panic(noSVDO)
  1321  			}
  1322  			if wantvo {
  1323  				panic(noSVDO)
  1324  			}
  1325  			iwork = ie + m
  1326  			var nru, ncvt int
  1327  			if wantuas || wantuo {
  1328  				nru = m
  1329  			}
  1330  			if wantvas || wantvo {
  1331  				ncvt = n
  1332  			}
  1333  			if !wantuo && !wantvo {
  1334  				// Perform bidiagonal QR iteration, if desired, computing left
  1335  				// singular vectors in U and computing right singular vectors in
  1336  				// VT.
  1337  				ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:],
  1338  					vt, ldvt, u, ldu, work, 1, work[iwork:])
  1339  			} else {
  1340  				// There will be two branches when the implementation is complete.
  1341  				panic(noSVDO)
  1342  			}
  1343  		}
  1344  	}
  1345  	if !ok {
  1346  		if ie > 1 {
  1347  			for i := 0; i < minmn-1; i++ {
  1348  				work[i+1] = work[i+ie]
  1349  			}
  1350  		}
  1351  		if ie < 1 {
  1352  			for i := minmn - 2; i >= 0; i-- {
  1353  				work[i+1] = work[i+ie]
  1354  			}
  1355  		}
  1356  	}
  1357  	// Undo scaling if necessary.
  1358  	if iscl {
  1359  		if anrm > bignum {
  1360  			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn, s, minmn)
  1361  		}
  1362  		if !ok && anrm > bignum {
  1363  			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn-1, work[1:], minmn)
  1364  		}
  1365  		if anrm < smlnum {
  1366  			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn, s, minmn)
  1367  		}
  1368  		if !ok && anrm < smlnum {
  1369  			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn-1, work[1:], minmn)
  1370  		}
  1371  	}
  1372  	work[0] = float64(maxwrk)
  1373  	return ok
  1374  }