gonum.org/v1/gonum@v0.14.0/lapack/gonum/dgesvd.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"math"
     9  
    10  	"gonum.org/v1/gonum/blas"
    11  	"gonum.org/v1/gonum/blas/blas64"
    12  	"gonum.org/v1/gonum/lapack"
    13  )
    14  
    15  const noSVDO = "dgesvd: not coded for overwrite"
    16  
    17  // Dgesvd computes the singular value decomposition of the input matrix A.
    18  //
    19  // The singular value decomposition is
    20  //
    21  //	A = U * Sigma * Vᵀ
    22  //
    23  // where Sigma is an m×n diagonal matrix containing the singular values of A,
    24  // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
    25  // min(m,n) columns of U and V are the left and right singular vectors of A
    26  // respectively.
    27  //
    28  // jobU and jobVT are options for computing the singular vectors. The behavior
    29  // is as follows
    30  //
    31  //	jobU == lapack.SVDAll       All m columns of U are returned in u
    32  //	jobU == lapack.SVDStore     The first min(m,n) columns are returned in u
    33  //	jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
    34  //	jobU == lapack.SVDNone      The columns of U are not computed.
    35  //
    36  // The behavior is the same for jobVT and the rows of Vᵀ. At most one of jobU
    37  // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
    38  //
    39  // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
    40  // the data is overwritten. On exit, A contains the appropriate singular vectors
    41  // if either job is lapack.SVDOverwrite.
    42  //
    43  // s is a slice of length at least min(m,n) and on exit contains the singular
    44  // values in decreasing order.
    45  //
    46  // u contains the left singular vectors on exit, stored column-wise. If
    47  // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDStore u is
    48  // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
    49  // not used.
    50  //
    51  // vt contains the left singular vectors on exit, stored row-wise. If
    52  // jobV == lapack.SVDAll, vt is of size n×n. If jobVT == lapack.SVDStore vt is
    53  // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
    54  // not used.
    55  //
    56  // work is a slice for storing temporary memory, and lwork is the usable size of
    57  // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
    58  // If lwork == -1, instead of performing Dgesvd, the optimal work length will be
    59  // stored into work[0]. Dgesvd will panic if the working memory has insufficient
    60  // storage.
    61  //
    62  // Dgesvd returns whether the decomposition successfully completed.
    63  func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
    64  	if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
    65  		panic(noSVDO)
    66  	}
    67  
    68  	wantua := jobU == lapack.SVDAll
    69  	wantus := jobU == lapack.SVDStore
    70  	wantuas := wantua || wantus
    71  	wantuo := jobU == lapack.SVDOverwrite
    72  	wantun := jobU == lapack.SVDNone
    73  	if !(wantua || wantus || wantuo || wantun) {
    74  		panic(badSVDJob)
    75  	}
    76  
    77  	wantva := jobVT == lapack.SVDAll
    78  	wantvs := jobVT == lapack.SVDStore
    79  	wantvas := wantva || wantvs
    80  	wantvo := jobVT == lapack.SVDOverwrite
    81  	wantvn := jobVT == lapack.SVDNone
    82  	if !(wantva || wantvs || wantvo || wantvn) {
    83  		panic(badSVDJob)
    84  	}
    85  
    86  	if wantuo && wantvo {
    87  		panic(bothSVDOver)
    88  	}
    89  
    90  	minmn := min(m, n)
    91  	minwork := 1
    92  	if minmn > 0 {
    93  		minwork = max(3*minmn+max(m, n), 5*minmn)
    94  	}
    95  	switch {
    96  	case m < 0:
    97  		panic(mLT0)
    98  	case n < 0:
    99  		panic(nLT0)
   100  	case lda < max(1, n):
   101  		panic(badLdA)
   102  	case ldu < 1, wantua && ldu < m, wantus && ldu < minmn:
   103  		panic(badLdU)
   104  	case ldvt < 1 || (wantvas && ldvt < n):
   105  		panic(badLdVT)
   106  	case lwork < minwork && lwork != -1:
   107  		panic(badLWork)
   108  	case len(work) < max(1, lwork):
   109  		panic(shortWork)
   110  	}
   111  
   112  	// Quick return if possible.
   113  	if minmn == 0 {
   114  		work[0] = 1
   115  		return true
   116  	}
   117  
   118  	// Compute optimal workspace size for subroutines.
   119  	opts := string(jobU) + string(jobVT)
   120  	mnthr := impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
   121  	maxwrk := 1
   122  	var wrkbl, bdspac int
   123  	if m >= n {
   124  		bdspac = 5 * n
   125  		impl.Dgeqrf(m, n, a, lda, nil, work, -1)
   126  		lwork_dgeqrf := int(work[0])
   127  
   128  		impl.Dorgqr(m, n, n, a, lda, nil, work, -1)
   129  		lwork_dorgqr_n := int(work[0])
   130  		impl.Dorgqr(m, m, n, a, lda, nil, work, -1)
   131  		lwork_dorgqr_m := int(work[0])
   132  
   133  		impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1)
   134  		lwork_dgebrd := int(work[0])
   135  
   136  		impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, nil, work, -1)
   137  		lwork_dorgbr_p := int(work[0])
   138  
   139  		impl.Dorgbr(lapack.GenerateQ, n, n, n, a, lda, nil, work, -1)
   140  		lwork_dorgbr_q := int(work[0])
   141  
   142  		if m >= mnthr {
   143  			if wantun {
   144  				// Path 1 (m much larger than n, jobU == None)
   145  				maxwrk = n + lwork_dgeqrf
   146  				maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
   147  				if wantvo || wantvas {
   148  					maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
   149  				}
   150  				maxwrk = max(maxwrk, bdspac)
   151  			} else if wantuo && wantvn {
   152  				// Path 2 (m much larger than n, jobU == Overwrite, jobVT == None)
   153  				wrkbl = n + lwork_dgeqrf
   154  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   155  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   156  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   157  				wrkbl = max(wrkbl, bdspac)
   158  				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
   159  			} else if wantuo && wantvas {
   160  				// Path 3 (m much larger than n, jobU == Overwrite, jobVT == Store or All)
   161  				wrkbl = n + lwork_dgeqrf
   162  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   163  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   164  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   165  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   166  				wrkbl = max(wrkbl, bdspac)
   167  				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
   168  			} else if wantus && wantvn {
   169  				// Path 4 (m much larger than n, jobU == Store, jobVT == None)
   170  				wrkbl = n + lwork_dgeqrf
   171  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   172  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   173  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   174  				wrkbl = max(wrkbl, bdspac)
   175  				maxwrk = n*n + wrkbl
   176  			} else if wantus && wantvo {
   177  				// Path 5 (m much larger than n, jobU == Store, jobVT == Overwrite)
   178  				wrkbl = n + lwork_dgeqrf
   179  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   180  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   181  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   182  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   183  				wrkbl = max(wrkbl, bdspac)
   184  				maxwrk = 2*n*n + wrkbl
   185  			} else if wantus && wantvas {
   186  				// Path 6 (m much larger than n, jobU == Store, jobVT == Store or All)
   187  				wrkbl = n + lwork_dgeqrf
   188  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   189  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   190  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   191  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   192  				wrkbl = max(wrkbl, bdspac)
   193  				maxwrk = n*n + wrkbl
   194  			} else if wantua && wantvn {
   195  				// Path 7 (m much larger than n, jobU == All, jobVT == None)
   196  				wrkbl = n + lwork_dgeqrf
   197  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   198  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   199  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   200  				wrkbl = max(wrkbl, bdspac)
   201  				maxwrk = n*n + wrkbl
   202  			} else if wantua && wantvo {
   203  				// Path 8 (m much larger than n, jobU == All, jobVT == Overwrite)
   204  				wrkbl = n + lwork_dgeqrf
   205  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   206  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   207  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   208  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   209  				wrkbl = max(wrkbl, bdspac)
   210  				maxwrk = 2*n*n + wrkbl
   211  			} else if wantua && wantvas {
   212  				// Path 9 (m much larger than n, jobU == All, jobVT == Store or All)
   213  				wrkbl = n + lwork_dgeqrf
   214  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   215  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   216  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   217  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   218  				wrkbl = max(wrkbl, bdspac)
   219  				maxwrk = n*n + wrkbl
   220  			}
   221  		} else {
   222  			// Path 10 (m at least n, but not much larger)
   223  			impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
   224  			lwork_dgebrd := int(work[0])
   225  			maxwrk = 3*n + lwork_dgebrd
   226  			if wantus || wantuo {
   227  				impl.Dorgbr(lapack.GenerateQ, m, n, n, a, lda, nil, work, -1)
   228  				lwork_dorgbr_q = int(work[0])
   229  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
   230  			}
   231  			if wantua {
   232  				impl.Dorgbr(lapack.GenerateQ, m, m, n, a, lda, nil, work, -1)
   233  				lwork_dorgbr_q := int(work[0])
   234  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
   235  			}
   236  			if !wantvn {
   237  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
   238  			}
   239  			maxwrk = max(maxwrk, bdspac)
   240  		}
   241  	} else {
   242  		bdspac = 5 * m
   243  
   244  		impl.Dgelqf(m, n, a, lda, nil, work, -1)
   245  		lwork_dgelqf := int(work[0])
   246  
   247  		impl.Dorglq(n, n, m, nil, n, nil, work, -1)
   248  		lwork_dorglq_n := int(work[0])
   249  		impl.Dorglq(m, n, m, a, lda, nil, work, -1)
   250  		lwork_dorglq_m := int(work[0])
   251  
   252  		impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1)
   253  		lwork_dgebrd := int(work[0])
   254  
   255  		impl.Dorgbr(lapack.GeneratePT, m, m, m, a, n, nil, work, -1)
   256  		lwork_dorgbr_p := int(work[0])
   257  
   258  		impl.Dorgbr(lapack.GenerateQ, m, m, m, a, n, nil, work, -1)
   259  		lwork_dorgbr_q := int(work[0])
   260  
   261  		if n >= mnthr {
   262  			if wantvn {
   263  				// Path 1t (n much larger than m, jobVT == None)
   264  				maxwrk = m + lwork_dgelqf
   265  				maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
   266  				if wantuo || wantuas {
   267  					maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
   268  				}
   269  				maxwrk = max(maxwrk, bdspac)
   270  			} else if wantvo && wantun {
   271  				// Path 2t (n much larger than m, jobU == None, jobVT == Overwrite)
   272  				wrkbl = m + lwork_dgelqf
   273  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   274  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   275  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   276  				wrkbl = max(wrkbl, bdspac)
   277  				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
   278  			} else if wantvo && wantuas {
   279  				// Path 3t (n much larger than m, jobU == Store or All, jobVT == Overwrite)
   280  				wrkbl = m + lwork_dgelqf
   281  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   282  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   283  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   284  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   285  				wrkbl = max(wrkbl, bdspac)
   286  				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
   287  			} else if wantvs && wantun {
   288  				// Path 4t (n much larger than m, jobU == None, jobVT == Store)
   289  				wrkbl = m + lwork_dgelqf
   290  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   291  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   292  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   293  				wrkbl = max(wrkbl, bdspac)
   294  				maxwrk = m*m + wrkbl
   295  			} else if wantvs && wantuo {
   296  				// Path 5t (n much larger than m, jobU == Overwrite, jobVT == Store)
   297  				wrkbl = m + lwork_dgelqf
   298  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   299  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   300  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   301  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   302  				wrkbl = max(wrkbl, bdspac)
   303  				maxwrk = 2*m*m + wrkbl
   304  			} else if wantvs && wantuas {
   305  				// Path 6t (n much larger than m, jobU == Store or All, jobVT == Store)
   306  				wrkbl = m + lwork_dgelqf
   307  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   308  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   309  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   310  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   311  				wrkbl = max(wrkbl, bdspac)
   312  				maxwrk = m*m + wrkbl
   313  			} else if wantva && wantun {
   314  				// Path 7t (n much larger than m, jobU== None, jobVT == All)
   315  				wrkbl = m + lwork_dgelqf
   316  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   317  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   318  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   319  				wrkbl = max(wrkbl, bdspac)
   320  				maxwrk = m*m + wrkbl
   321  			} else if wantva && wantuo {
   322  				// Path 8t (n much larger than m, jobU == Overwrite, jobVT == All)
   323  				wrkbl = m + lwork_dgelqf
   324  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   325  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   326  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   327  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   328  				wrkbl = max(wrkbl, bdspac)
   329  				maxwrk = 2*m*m + wrkbl
   330  			} else if wantva && wantuas {
   331  				// Path 9t (n much larger than m, jobU == Store or All, jobVT == All)
   332  				wrkbl = m + lwork_dgelqf
   333  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   334  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   335  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   336  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   337  				wrkbl = max(wrkbl, bdspac)
   338  				maxwrk = m*m + wrkbl
   339  			}
   340  		} else {
   341  			// Path 10t (n greater than m, but not much larger)
   342  			impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
   343  			lwork_dgebrd = int(work[0])
   344  			maxwrk = 3*m + lwork_dgebrd
   345  			if wantvs || wantvo {
   346  				impl.Dorgbr(lapack.GeneratePT, m, n, m, a, n, nil, work, -1)
   347  				lwork_dorgbr_p = int(work[0])
   348  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
   349  			}
   350  			if wantva {
   351  				impl.Dorgbr(lapack.GeneratePT, n, n, m, a, n, nil, work, -1)
   352  				lwork_dorgbr_p = int(work[0])
   353  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
   354  			}
   355  			if !wantun {
   356  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
   357  			}
   358  			maxwrk = max(maxwrk, bdspac)
   359  		}
   360  	}
   361  
   362  	maxwrk = max(maxwrk, minwork)
   363  	if lwork == -1 {
   364  		work[0] = float64(maxwrk)
   365  		return true
   366  	}
   367  
   368  	if len(a) < (m-1)*lda+n {
   369  		panic(shortA)
   370  	}
   371  	if len(s) < minmn {
   372  		panic(shortS)
   373  	}
   374  	if (len(u) < (m-1)*ldu+m && wantua) || (len(u) < (m-1)*ldu+minmn && wantus) {
   375  		panic(shortU)
   376  	}
   377  	if (len(vt) < (n-1)*ldvt+n && wantva) || (len(vt) < (minmn-1)*ldvt+n && wantvs) {
   378  		panic(shortVT)
   379  	}
   380  
   381  	// Perform decomposition.
   382  	eps := dlamchE
   383  	smlnum := math.Sqrt(dlamchS) / eps
   384  	bignum := 1 / smlnum
   385  
   386  	// Scale A if max element outside range [smlnum, bignum].
   387  	anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
   388  	var iscl bool
   389  	if anrm > 0 && anrm < smlnum {
   390  		iscl = true
   391  		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
   392  	} else if anrm > bignum {
   393  		iscl = true
   394  		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
   395  	}
   396  
   397  	bi := blas64.Implementation()
   398  	var ie int
   399  	if m >= n {
   400  		// If A has sufficiently more rows than columns, use the QR decomposition.
   401  		if m >= mnthr {
   402  			// m >> n
   403  			if wantun {
   404  				// Path 1.
   405  				itau := 0
   406  				iwork := itau + n
   407  
   408  				// Compute A = Q * R.
   409  				impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   410  
   411  				// Zero out below R.
   412  				impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   413  				ie = 0
   414  				itauq := ie + n
   415  				itaup := itauq + n
   416  				iwork = itaup + n
   417  				// Bidiagonalize R in A.
   418  				impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:],
   419  					work[itaup:], work[iwork:], lwork-iwork)
   420  				ncvt := 0
   421  				if wantvo || wantvas {
   422  					impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, work[itaup:],
   423  						work[iwork:], lwork-iwork)
   424  					ncvt = n
   425  				}
   426  				iwork = ie + n
   427  
   428  				// Perform bidiagonal QR iteration computing right singular vectors
   429  				// of A in A if desired.
   430  				ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:],
   431  					a, lda, work, 1, work, 1, work[iwork:])
   432  
   433  				// If right singular vectors desired in VT, copy them there.
   434  				if wantvas {
   435  					impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
   436  				}
   437  			} else if wantuo && wantvn {
   438  				// Path 2
   439  				panic(noSVDO)
   440  			} else if wantuo && wantvas {
   441  				// Path 3
   442  				panic(noSVDO)
   443  			} else if wantus {
   444  				if wantvn {
   445  					// Path 4
   446  					if lwork >= n*n+max(4*n, bdspac) {
   447  						// Sufficient workspace for a fast algorithm.
   448  						ir := 0
   449  						var ldworkr int
   450  						if lwork >= wrkbl+lda*n {
   451  							ldworkr = lda
   452  						} else {
   453  							ldworkr = n
   454  						}
   455  						itau := ir + ldworkr*n
   456  						iwork := itau + n
   457  						// Compute A = Q * R.
   458  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   459  
   460  						// Copy R to work[ir:], zeroing out below it.
   461  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
   462  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
   463  
   464  						// Generate Q in A.
   465  						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   466  						ie := itau
   467  						itauq := ie + n
   468  						itaup := itauq + n
   469  						iwork = itaup + n
   470  
   471  						// Bidiagonalize R in work[ir:].
   472  						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
   473  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   474  
   475  						// Generate left vectors bidiagonalizing R in work[ir:].
   476  						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr,
   477  							work[itauq:], work[iwork:], lwork-iwork)
   478  						iwork = ie + n
   479  
   480  						// Perform bidiagonal QR iteration, computing left singular
   481  						// vectors of R in work[ir:].
   482  						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
   483  							work[ir:], ldworkr, work, 1, work[iwork:])
   484  
   485  						// Multiply Q in A by left singular vectors of R in
   486  						// work[ir:], storing result in U.
   487  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
   488  							work[ir:], ldworkr, 0, u, ldu)
   489  					} else {
   490  						// Insufficient workspace for a fast algorithm.
   491  						itau := 0
   492  						iwork := itau + n
   493  
   494  						// Compute A = Q*R, copying result to U.
   495  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   496  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   497  
   498  						// Generate Q in U.
   499  						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   500  						ie := itau
   501  						itauq := ie + n
   502  						itaup := itauq + n
   503  						iwork = itaup + n
   504  
   505  						// Zero out below R in A.
   506  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   507  
   508  						// Bidiagonalize R in A.
   509  						impl.Dgebrd(n, n, a, lda, s, work[ie:],
   510  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   511  
   512  						// Multiply Q in U by left vectors bidiagonalizing R.
   513  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   514  							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   515  						iwork = ie + n
   516  
   517  						// Perform bidiagonal QR iteration, computing left
   518  						// singular vectors of A in U.
   519  						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1,
   520  							u, ldu, work, 1, work[iwork:])
   521  					}
   522  				} else if wantvo {
   523  					// Path 5
   524  					panic(noSVDO)
   525  				} else if wantvas {
   526  					// Path 6
   527  					if lwork >= n*n+max(4*n, bdspac) {
   528  						// Sufficient workspace for a fast algorithm.
   529  						iu := 0
   530  						var ldworku int
   531  						if lwork >= wrkbl+lda*n {
   532  							ldworku = lda
   533  						} else {
   534  							ldworku = n
   535  						}
   536  						itau := iu + ldworku*n
   537  						iwork := itau + n
   538  
   539  						// Compute A = Q * R.
   540  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   541  						// Copy R to work[iu:], zeroing out below it.
   542  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
   543  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
   544  
   545  						// Generate Q in A.
   546  						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   547  
   548  						ie := itau
   549  						itauq := ie + n
   550  						itaup := itauq + n
   551  						iwork = itaup + n
   552  
   553  						// Bidiagonalize R in work[iu:], copying result to VT.
   554  						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
   555  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   556  						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
   557  
   558  						// Generate left bidiagonalizing vectors in work[iu:].
   559  						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku,
   560  							work[itauq:], work[iwork:], lwork-iwork)
   561  
   562  						// Generate right bidiagonalizing vectors in VT.
   563  						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   564  							work[itaup:], work[iwork:], lwork-iwork)
   565  						iwork = ie + n
   566  
   567  						// Perform bidiagonal QR iteration, computing left singular
   568  						// vectors of R in work[iu:], and computing right singular
   569  						// vectors of R in VT.
   570  						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
   571  							vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
   572  
   573  						// Multiply Q in A by left singular vectors of R in
   574  						// work[iu:], storing result in U.
   575  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
   576  							work[iu:], ldworku, 0, u, ldu)
   577  					} else {
   578  						// Insufficient workspace for a fast algorithm.
   579  						itau := 0
   580  						iwork := itau + n
   581  
   582  						// Compute A = Q * R, copying result to U.
   583  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   584  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   585  
   586  						// Generate Q in U.
   587  						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   588  
   589  						// Copy R to VT, zeroing out below it.
   590  						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   591  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
   592  
   593  						ie := itau
   594  						itauq := ie + n
   595  						itaup := itauq + n
   596  						iwork = itaup + n
   597  
   598  						// Bidiagonalize R in VT.
   599  						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   600  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   601  
   602  						// Multiply Q in U by left bidiagonalizing vectors in VT.
   603  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   604  							vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   605  
   606  						// Generate right bidiagonalizing vectors in VT.
   607  						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   608  							work[itaup:], work[iwork:], lwork-iwork)
   609  						iwork = ie + n
   610  
   611  						// Perform bidiagonal QR iteration, computing left singular
   612  						// vectors of A in U and computing right singular vectors
   613  						// of A in VT.
   614  						ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   615  							vt, ldvt, u, ldu, work, 1, work[iwork:])
   616  					}
   617  				}
   618  			} else if wantua {
   619  				if wantvn {
   620  					// Path 7
   621  					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
   622  						// Sufficient workspace for a fast algorithm.
   623  						ir := 0
   624  						var ldworkr int
   625  						if lwork >= wrkbl+lda*n {
   626  							ldworkr = lda
   627  						} else {
   628  							ldworkr = n
   629  						}
   630  						itau := ir + ldworkr*n
   631  						iwork := itau + n
   632  
   633  						// Compute A = Q*R, copying result to U.
   634  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   635  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   636  
   637  						// Copy R to work[ir:], zeroing out below it.
   638  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
   639  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
   640  
   641  						// Generate Q in U.
   642  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   643  						ie := itau
   644  						itauq := ie + n
   645  						itaup := itauq + n
   646  						iwork = itaup + n
   647  
   648  						// Bidiagonalize R in work[ir:].
   649  						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
   650  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   651  
   652  						// Generate left bidiagonalizing vectors in work[ir:].
   653  						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr,
   654  							work[itauq:], work[iwork:], lwork-iwork)
   655  						iwork = ie + n
   656  
   657  						// Perform bidiagonal QR iteration, computing left singular
   658  						// vectors of R in work[ir:].
   659  						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
   660  							work[ir:], ldworkr, work, 1, work[iwork:])
   661  
   662  						// Multiply Q in U by left singular vectors of R in
   663  						// work[ir:], storing result in A.
   664  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu,
   665  							work[ir:], ldworkr, 0, a, lda)
   666  
   667  						// Copy left singular vectors of A from A to U.
   668  						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
   669  					} else {
   670  						// Insufficient workspace for a fast algorithm.
   671  						itau := 0
   672  						iwork := itau + n
   673  
   674  						// Compute A = Q*R, copying result to U.
   675  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   676  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   677  
   678  						// Generate Q in U.
   679  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   680  						ie := itau
   681  						itauq := ie + n
   682  						itaup := itauq + n
   683  						iwork = itaup + n
   684  
   685  						// Zero out below R in A.
   686  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   687  
   688  						// Bidiagonalize R in A.
   689  						impl.Dgebrd(n, n, a, lda, s, work[ie:],
   690  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   691  
   692  						// Multiply Q in U by left bidiagonalizing vectors in A.
   693  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   694  							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   695  						iwork = ie + n
   696  
   697  						// Perform bidiagonal QR iteration, computing left
   698  						// singular vectors of A in U.
   699  						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:],
   700  							work, 1, u, ldu, work, 1, work[iwork:])
   701  					}
   702  				} else if wantvo {
   703  					// Path 8.
   704  					panic(noSVDO)
   705  				} else if wantvas {
   706  					// Path 9.
   707  					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
   708  						// Sufficient workspace for a fast algorithm.
   709  						iu := 0
   710  						var ldworku int
   711  						if lwork >= wrkbl+lda*n {
   712  							ldworku = lda
   713  						} else {
   714  							ldworku = n
   715  						}
   716  						itau := iu + ldworku*n
   717  						iwork := itau + n
   718  
   719  						// Compute A = Q * R, copying result to U.
   720  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   721  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   722  
   723  						// Generate Q in U.
   724  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   725  
   726  						// Copy R to work[iu:], zeroing out below it.
   727  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
   728  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
   729  
   730  						ie = itau
   731  						itauq := ie + n
   732  						itaup := itauq + n
   733  						iwork = itaup + n
   734  
   735  						// Bidiagonalize R in work[iu:], copying result to VT.
   736  						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
   737  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   738  						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
   739  
   740  						// Generate left bidiagonalizing vectors in work[iu:].
   741  						impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku,
   742  							work[itauq:], work[iwork:], lwork-iwork)
   743  
   744  						// Generate right bidiagonalizing vectors in VT.
   745  						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   746  							work[itaup:], work[iwork:], lwork-iwork)
   747  						iwork = ie + n
   748  
   749  						// Perform bidiagonal QR iteration, computing left singular
   750  						// vectors of R in work[iu:] and computing right
   751  						// singular vectors of R in VT.
   752  						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
   753  							vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
   754  
   755  						// Multiply Q in U by left singular vectors of R in
   756  						// work[iu:], storing result in A.
   757  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1,
   758  							u, ldu, work[iu:], ldworku, 0, a, lda)
   759  
   760  						// Copy left singular vectors of A from A to U.
   761  						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
   762  
   763  						/*
   764  							// Bidiagonalize R in VT.
   765  							impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   766  								work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   767  
   768  							// Multiply Q in U by left bidiagonalizing vectors in VT.
   769  							impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
   770  								m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   771  
   772  							// Generate right bidiagonalizing vectors in VT.
   773  							impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   774  								work[itaup:], work[iwork:], lwork-iwork)
   775  							iwork = ie + n
   776  
   777  							// Perform bidiagonal QR iteration, computing left singular
   778  							// vectors of A in U and computing right singular vectors
   779  							// of A in VT.
   780  							ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   781  								vt, ldvt, u, ldu, work, 1, work[iwork:])
   782  						*/
   783  					} else {
   784  						// Insufficient workspace for a fast algorithm.
   785  						itau := 0
   786  						iwork := itau + n
   787  
   788  						// Compute A = Q*R, copying result to U.
   789  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   790  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   791  
   792  						// Generate Q in U.
   793  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   794  
   795  						// Copy R from A to VT, zeroing out below it.
   796  						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   797  						if n > 1 {
   798  							impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
   799  						}
   800  
   801  						ie := itau
   802  						itauq := ie + n
   803  						itaup := itauq + n
   804  						iwork = itaup + n
   805  
   806  						// Bidiagonalize R in VT.
   807  						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   808  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   809  
   810  						// Multiply Q in U by left bidiagonalizing vectors in VT.
   811  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
   812  							m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   813  
   814  						// Generate right bidiagonizing vectors in VT.
   815  						impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt,
   816  							work[itaup:], work[iwork:], lwork-iwork)
   817  						iwork = ie + n
   818  
   819  						// Perform bidiagonal QR iteration, computing left singular
   820  						// vectors of A in U and computing right singular vectors
   821  						// of A in VT.
   822  						ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   823  							vt, ldvt, u, ldu, work, 1, work[iwork:])
   824  					}
   825  				}
   826  			}
   827  		} else {
   828  			// Path 10.
   829  			// M at least N, but not much larger.
   830  			ie = 0
   831  			itauq := ie + n
   832  			itaup := itauq + n
   833  			iwork := itaup + n
   834  
   835  			// Bidiagonalize A.
   836  			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:],
   837  				work[itaup:], work[iwork:], lwork-iwork)
   838  			if wantuas {
   839  				// Left singular vectors are desired in U. Copy result to U and
   840  				// generate left biadiagonalizing vectors in U.
   841  				impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   842  				var ncu int
   843  				if wantus {
   844  					ncu = n
   845  				}
   846  				if wantua {
   847  					ncu = m
   848  				}
   849  				impl.Dorgbr(lapack.GenerateQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
   850  			}
   851  			if wantvas {
   852  				// Right singular vectors are desired in VT. Copy result to VT and
   853  				// generate left biadiagonalizing vectors in VT.
   854  				impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   855  				impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
   856  			}
   857  			if wantuo {
   858  				panic(noSVDO)
   859  			}
   860  			if wantvo {
   861  				panic(noSVDO)
   862  			}
   863  			iwork = ie + n
   864  			var nru, ncvt int
   865  			if wantuas || wantuo {
   866  				nru = m
   867  			}
   868  			if wantun {
   869  				nru = 0
   870  			}
   871  			if wantvas || wantvo {
   872  				ncvt = n
   873  			}
   874  			if wantvn {
   875  				ncvt = 0
   876  			}
   877  			if !wantuo && !wantvo {
   878  				// Perform bidiagonal QR iteration, if desired, computing left
   879  				// singular vectors in U and right singular vectors in VT.
   880  				ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:],
   881  					vt, ldvt, u, ldu, work, 1, work[iwork:])
   882  			} else {
   883  				// There will be two branches when the implementation is complete.
   884  				panic(noSVDO)
   885  			}
   886  		}
   887  	} else {
   888  		// A has more columns than rows. If A has sufficiently more columns than
   889  		// rows, first reduce using the LQ decomposition.
   890  		if n >= mnthr {
   891  			// n >> m.
   892  			if wantvn {
   893  				// Path 1t.
   894  				itau := 0
   895  				iwork := itau + m
   896  
   897  				// Compute A = L*Q.
   898  				impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   899  
   900  				// Zero out above L.
   901  				impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
   902  				ie := 0
   903  				itauq := ie + m
   904  				itaup := itauq + m
   905  				iwork = itaup + m
   906  
   907  				// Bidiagonalize L in A.
   908  				impl.Dgebrd(m, m, a, lda, s, work[ie:itauq],
   909  					work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork)
   910  				if wantuo || wantuas {
   911  					impl.Dorgbr(lapack.GenerateQ, m, m, m, a, lda,
   912  						work[itauq:], work[iwork:], lwork-iwork)
   913  				}
   914  				iwork = ie + m
   915  				nru := 0
   916  				if wantuo || wantuas {
   917  					nru = m
   918  				}
   919  
   920  				// Perform bidiagonal QR iteration, computing left singular vectors
   921  				// of A in A if desired.
   922  				ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:],
   923  					work, 1, a, lda, work, 1, work[iwork:])
   924  
   925  				// If left singular vectors desired in U, copy them there.
   926  				if wantuas {
   927  					impl.Dlacpy(blas.All, m, m, a, lda, u, ldu)
   928  				}
   929  			} else if wantvo && wantun {
   930  				// Path 2t.
   931  				panic(noSVDO)
   932  			} else if wantvo && wantuas {
   933  				// Path 3t.
   934  				panic(noSVDO)
   935  			} else if wantvs {
   936  				if wantun {
   937  					// Path 4t.
   938  					if lwork >= m*m+max(4*m, bdspac) {
   939  						// Sufficient workspace for a fast algorithm.
   940  						ir := 0
   941  						var ldworkr int
   942  						if lwork >= wrkbl+lda*m {
   943  							ldworkr = lda
   944  						} else {
   945  							ldworkr = m
   946  						}
   947  						itau := ir + ldworkr*m
   948  						iwork := itau + m
   949  
   950  						// Compute A = L*Q.
   951  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   952  
   953  						// Copy L to work[ir:], zeroing out above it.
   954  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
   955  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
   956  
   957  						// Generate Q in A.
   958  						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
   959  						ie := itau
   960  						itauq := ie + m
   961  						itaup := itauq + m
   962  						iwork = itaup + m
   963  
   964  						// Bidiagonalize L in work[ir:].
   965  						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
   966  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   967  
   968  						// Generate right vectors bidiagonalizing L in work[ir:].
   969  						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr,
   970  							work[itaup:], work[iwork:], lwork-iwork)
   971  						iwork = ie + m
   972  
   973  						// Perform bidiagonal QR iteration, computing right singular
   974  						// vectors of L in work[ir:].
   975  						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
   976  							work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
   977  
   978  						// Multiply right singular vectors of L in work[ir:] by
   979  						// Q in A, storing result in VT.
   980  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
   981  							work[ir:], ldworkr, a, lda, 0, vt, ldvt)
   982  					} else {
   983  						// Insufficient workspace for a fast algorithm.
   984  						itau := 0
   985  						iwork := itau + m
   986  
   987  						// Compute A = L*Q.
   988  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   989  
   990  						// Copy result to VT.
   991  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
   992  
   993  						// Generate Q in VT.
   994  						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
   995  						ie := itau
   996  						itauq := ie + m
   997  						itaup := itauq + m
   998  						iwork = itaup + m
   999  
  1000  						// Zero out above L in A.
  1001  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
  1002  
  1003  						// Bidiagonalize L in A.
  1004  						impl.Dgebrd(m, m, a, lda, s, work[ie:],
  1005  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1006  
  1007  						// Multiply right vectors bidiagonalizing L by Q in VT.
  1008  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1009  							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1010  						iwork = ie + m
  1011  
  1012  						// Perform bidiagonal QR iteration, computing right
  1013  						// singular vectors of A in VT.
  1014  						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
  1015  							vt, ldvt, work, 1, work, 1, work[iwork:])
  1016  					}
  1017  				} else if wantuo {
  1018  					// Path 5t.
  1019  					panic(noSVDO)
  1020  				} else if wantuas {
  1021  					// Path 6t.
  1022  					if lwork >= m*m+max(4*m, bdspac) {
  1023  						// Sufficient workspace for a fast algorithm.
  1024  						iu := 0
  1025  						var ldworku int
  1026  						if lwork >= wrkbl+lda*m {
  1027  							ldworku = lda
  1028  						} else {
  1029  							ldworku = m
  1030  						}
  1031  						itau := iu + ldworku*m
  1032  						iwork := itau + m
  1033  
  1034  						// Compute A = L*Q.
  1035  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1036  
  1037  						// Copy L to work[iu:], zeroing out above it.
  1038  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
  1039  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
  1040  
  1041  						// Generate Q in A.
  1042  						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1043  						ie := itau
  1044  						itauq := ie + m
  1045  						itaup := itauq + m
  1046  						iwork = itaup + m
  1047  
  1048  						// Bidiagonalize L in work[iu:], copying result to U.
  1049  						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
  1050  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1051  						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
  1052  
  1053  						// Generate right bidiagionalizing vectors in work[iu:].
  1054  						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku,
  1055  							work[itaup:], work[iwork:], lwork-iwork)
  1056  
  1057  						// Generate left bidiagonalizing vectors in U.
  1058  						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1059  						iwork = ie + m
  1060  
  1061  						// Perform bidiagonal QR iteration, computing left singular
  1062  						// vectors of L in U and computing right singular vectors of
  1063  						// L in work[iu:].
  1064  						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
  1065  							work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
  1066  
  1067  						// Multiply right singular vectors of L in work[iu:] by
  1068  						// Q in A, storing result in VT.
  1069  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1070  							work[iu:], ldworku, a, lda, 0, vt, ldvt)
  1071  					} else {
  1072  						// Insufficient workspace for a fast algorithm.
  1073  						itau := 0
  1074  						iwork := itau + m
  1075  
  1076  						// Compute A = L*Q, copying result to VT.
  1077  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1078  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1079  
  1080  						// Generate Q in VT.
  1081  						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1082  
  1083  						// Copy L to U, zeroing out above it.
  1084  						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1085  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
  1086  
  1087  						ie := itau
  1088  						itauq := ie + m
  1089  						itaup := itauq + m
  1090  						iwork = itaup + m
  1091  
  1092  						// Bidiagonalize L in U.
  1093  						impl.Dgebrd(m, m, u, ldu, s, work[ie:],
  1094  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1095  
  1096  						// Multiply right bidiagonalizing vectors in U by Q in VT.
  1097  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1098  							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1099  
  1100  						// Generate left bidiagonalizing vectors in U.
  1101  						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1102  						iwork = ie + m
  1103  
  1104  						// Perform bidiagonal QR iteration, computing left singular
  1105  						// vectors of A in U and computing right singular vectors
  1106  						// of A in VT.
  1107  						ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt,
  1108  							u, ldu, work, 1, work[iwork:])
  1109  					}
  1110  				}
  1111  			} else if wantva {
  1112  				if wantun {
  1113  					// Path 7t.
  1114  					if lwork >= m*m+max(max(n+m, 4*m), bdspac) {
  1115  						// Sufficient workspace for a fast algorithm.
  1116  						ir := 0
  1117  						var ldworkr int
  1118  						if lwork >= wrkbl+lda*m {
  1119  							ldworkr = lda
  1120  						} else {
  1121  							ldworkr = m
  1122  						}
  1123  						itau := ir + ldworkr*m
  1124  						iwork := itau + m
  1125  
  1126  						// Compute A = L*Q, copying result to VT.
  1127  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1128  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1129  
  1130  						// Copy L to work[ir:], zeroing out above it.
  1131  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
  1132  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
  1133  
  1134  						// Generate Q in VT.
  1135  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1136  
  1137  						ie := itau
  1138  						itauq := ie + m
  1139  						itaup := itauq + m
  1140  						iwork = itaup + m
  1141  
  1142  						// Bidiagonalize L in work[ir:].
  1143  						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
  1144  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1145  
  1146  						// Generate right bidiagonalizing vectors in work[ir:].
  1147  						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr,
  1148  							work[itaup:], work[iwork:], lwork-iwork)
  1149  						iwork = ie + m
  1150  
  1151  						// Perform bidiagonal QR iteration, computing right
  1152  						// singular vectors of L in work[ir:].
  1153  						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
  1154  							work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
  1155  
  1156  						// Multiply right singular vectors of L in work[ir:] by
  1157  						// Q in VT, storing result in A.
  1158  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1159  							work[ir:], ldworkr, vt, ldvt, 0, a, lda)
  1160  
  1161  						// Copy right singular vectors of A from A to VT.
  1162  						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
  1163  					} else {
  1164  						// Insufficient workspace for a fast algorithm.
  1165  						itau := 0
  1166  						iwork := itau + m
  1167  						// Compute A = L * Q, copying result to VT.
  1168  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1169  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1170  
  1171  						// Generate Q in VT.
  1172  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1173  
  1174  						ie := itau
  1175  						itauq := ie + m
  1176  						itaup := itauq + m
  1177  						iwork = itaup + m
  1178  
  1179  						// Zero out above L in A.
  1180  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
  1181  
  1182  						// Bidiagonalize L in A.
  1183  						impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:],
  1184  							work[itaup:], work[iwork:], lwork-iwork)
  1185  
  1186  						// Multiply right bidiagonalizing vectors in A by Q in VT.
  1187  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1188  							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1189  						iwork = ie + m
  1190  
  1191  						// Perform bidiagonal QR iteration, computing right singular
  1192  						// vectors of A in VT.
  1193  						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
  1194  							vt, ldvt, work, 1, work, 1, work[iwork:])
  1195  					}
  1196  				} else if wantuo {
  1197  					panic(noSVDO)
  1198  				} else if wantuas {
  1199  					// Path 9t.
  1200  					if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
  1201  						// Sufficient workspace for a fast algorithm.
  1202  						iu := 0
  1203  
  1204  						var ldworku int
  1205  						if lwork >= wrkbl+lda*m {
  1206  							ldworku = lda
  1207  						} else {
  1208  							ldworku = m
  1209  						}
  1210  						itau := iu + ldworku*m
  1211  						iwork := itau + m
  1212  
  1213  						// Generate A = L * Q copying result to VT.
  1214  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1215  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1216  
  1217  						// Generate Q in VT.
  1218  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1219  
  1220  						// Copy L to work[iu:], zeroing out above it.
  1221  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
  1222  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
  1223  						ie = itau
  1224  						itauq := ie + m
  1225  						itaup := itauq + m
  1226  						iwork = itaup + m
  1227  
  1228  						// Bidiagonalize L in work[iu:], copying result to U.
  1229  						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
  1230  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1231  						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
  1232  
  1233  						// Generate right bidiagonalizing vectors in work[iu:].
  1234  						impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku,
  1235  							work[itaup:], work[iwork:], lwork-iwork)
  1236  
  1237  						// Generate left bidiagonalizing vectors in U.
  1238  						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1239  						iwork = ie + m
  1240  
  1241  						// Perform bidiagonal QR iteration, computing left singular
  1242  						// vectors of L in U and computing right singular vectors
  1243  						// of L in work[iu:].
  1244  						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
  1245  							work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
  1246  
  1247  						// Multiply right singular vectors of L in work[iu:]
  1248  						// Q in VT, storing result in A.
  1249  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1250  							work[iu:], ldworku, vt, ldvt, 0, a, lda)
  1251  
  1252  						// Copy right singular vectors of A from A to VT.
  1253  						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
  1254  					} else {
  1255  						// Insufficient workspace for a fast algorithm.
  1256  						itau := 0
  1257  						iwork := itau + m
  1258  
  1259  						// Compute A = L * Q, copying result to VT.
  1260  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1261  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1262  
  1263  						// Generate Q in VT.
  1264  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1265  
  1266  						// Copy L to U, zeroing out above it.
  1267  						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1268  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
  1269  
  1270  						ie = itau
  1271  						itauq := ie + m
  1272  						itaup := itauq + m
  1273  						iwork = itaup + m
  1274  
  1275  						// Bidiagonalize L in U.
  1276  						impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:],
  1277  							work[itaup:], work[iwork:], lwork-iwork)
  1278  
  1279  						// Multiply right bidiagonalizing vectors in U by Q in VT.
  1280  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1281  							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1282  
  1283  						// Generate left bidiagonalizing vectors in U.
  1284  						impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1285  						iwork = ie + m
  1286  
  1287  						// Perform bidiagonal QR iteration, computing left singular
  1288  						// vectors of A in U and computing right singular vectors
  1289  						// of A in VT.
  1290  						ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:],
  1291  							vt, ldvt, u, ldu, work, 1, work[iwork:])
  1292  					}
  1293  				}
  1294  			}
  1295  		} else {
  1296  			// Path 10t.
  1297  			// N at least M, but not much larger.
  1298  			ie = 0
  1299  			itauq := ie + m
  1300  			itaup := itauq + m
  1301  			iwork := itaup + m
  1302  
  1303  			// Bidiagonalize A.
  1304  			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1305  			if wantuas {
  1306  				// If left singular vectors desired in U, copy result to U and
  1307  				// generate left bidiagonalizing vectors in U.
  1308  				impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1309  				impl.Dorgbr(lapack.GenerateQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1310  			}
  1311  			if wantvas {
  1312  				// If right singular vectors desired in VT, copy result to VT
  1313  				// and generate right bidiagonalizing vectors in VT.
  1314  				impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1315  				var nrvt int
  1316  				if wantva {
  1317  					nrvt = n
  1318  				} else {
  1319  					nrvt = m
  1320  				}
  1321  				impl.Dorgbr(lapack.GeneratePT, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
  1322  			}
  1323  			if wantuo {
  1324  				panic(noSVDO)
  1325  			}
  1326  			if wantvo {
  1327  				panic(noSVDO)
  1328  			}
  1329  			iwork = ie + m
  1330  			var nru, ncvt int
  1331  			if wantuas || wantuo {
  1332  				nru = m
  1333  			}
  1334  			if wantvas || wantvo {
  1335  				ncvt = n
  1336  			}
  1337  			if !wantuo && !wantvo {
  1338  				// Perform bidiagonal QR iteration, if desired, computing left
  1339  				// singular vectors in U and computing right singular vectors in
  1340  				// VT.
  1341  				ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:],
  1342  					vt, ldvt, u, ldu, work, 1, work[iwork:])
  1343  			} else {
  1344  				// There will be two branches when the implementation is complete.
  1345  				panic(noSVDO)
  1346  			}
  1347  		}
  1348  	}
  1349  	if !ok {
  1350  		if ie > 1 {
  1351  			for i := 0; i < minmn-1; i++ {
  1352  				work[i+1] = work[i+ie]
  1353  			}
  1354  		}
  1355  		if ie < 1 {
  1356  			for i := minmn - 2; i >= 0; i-- {
  1357  				work[i+1] = work[i+ie]
  1358  			}
  1359  		}
  1360  	}
  1361  	// Undo scaling if necessary.
  1362  	if iscl {
  1363  		if anrm > bignum {
  1364  			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn, s, minmn)
  1365  		}
  1366  		if !ok && anrm > bignum {
  1367  			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn-1, work[1:], minmn)
  1368  		}
  1369  		if anrm < smlnum {
  1370  			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn, s, minmn)
  1371  		}
  1372  		if !ok && anrm < smlnum {
  1373  			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn-1, work[1:], minmn)
  1374  		}
  1375  	}
  1376  	work[0] = float64(maxwrk)
  1377  	return ok
  1378  }