github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dgesvd.go (about)

     1  // Copyright ©2015 The gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package native
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/gonum/blas"
    11  	"github.com/gonum/blas/blas64"
    12  	"github.com/gonum/lapack"
    13  )
    14  
    15  const noSVDO = "dgesvd: not coded for overwrite"
    16  
    17  // Dgesvd computes the singular value decomposition of the input matrix A.
    18  //
    19  // The singular value decomposition is
    20  //  A = U * Sigma * V^T
    21  // where Sigma is an m×n diagonal matrix containing the singular values of A,
    22  // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first
    23  // min(m,n) columns of U and V are the left and right singular vectors of A
    24  // respectively.
    25  //
    26  // jobU and jobVT are options for computing the singular vectors. The behavior
    27  // is as follows
    28  //  jobU == lapack.SVDAll       All m columns of U are returned in u
    29  //  jobU == lapack.SVDInPlace   The first min(m,n) columns are returned in u
    30  //  jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a
    31  //  jobU == lapack.SVDNone      The columns of U are not computed.
    32  // The behavior is the same for jobVT and the rows of V^T. At most one of jobU
    33  // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise.
    34  //
    35  // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd
    36  // the data is overwritten. On exit, A contains the appropriate singular vectors
    37  // if either job is lapack.SVDOverwrite.
    38  //
    39  // s is a slice of length at least min(m,n) and on exit contains the singular
    40  // values in decreasing order.
    41  //
    42  // u contains the left singular vectors on exit, stored column-wise. If
    43  // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is
    44  // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is
    45  // not used.
    46  //
    47  // vt contains the left singular vectors on exit, stored row-wise. If
    48  // jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is
    49  // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is
    50  // not used.
    51  //
    52  // work is a slice for storing temporary memory, and lwork is the usable size of
    53  // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)).
    54  // If lwork == -1, instead of performing Dgesvd, the optimal work length will be
    55  // stored into work[0]. Dgesvd will panic if the working memory has insufficient
    56  // storage.
    57  //
    58  // Dgesvd returns whether the decomposition successfully completed.
    59  func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) {
    60  	minmn := min(m, n)
    61  	checkMatrix(m, n, a, lda)
    62  	if jobU == lapack.SVDAll {
    63  		checkMatrix(m, m, u, ldu)
    64  	} else if jobU == lapack.SVDInPlace {
    65  		checkMatrix(m, minmn, u, ldu)
    66  	}
    67  	if jobVT == lapack.SVDAll {
    68  		checkMatrix(n, n, vt, ldvt)
    69  	} else if jobVT == lapack.SVDInPlace {
    70  		checkMatrix(minmn, n, vt, ldvt)
    71  	}
    72  	if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite {
    73  		panic("lapack: both jobU and jobVT are lapack.SVDOverwrite")
    74  	}
    75  	if len(s) < minmn {
    76  		panic(badS)
    77  	}
    78  	if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite {
    79  		panic(noSVDO)
    80  	}
    81  	if m == 0 || n == 0 {
    82  		return true
    83  	}
    84  
    85  	wantua := jobU == lapack.SVDAll
    86  	wantus := jobU == lapack.SVDInPlace
    87  	wantuas := wantua || wantus
    88  	wantuo := jobU == lapack.SVDOverwrite
    89  	wantun := jobU == lapack.None
    90  
    91  	wantva := jobVT == lapack.SVDAll
    92  	wantvs := jobVT == lapack.SVDInPlace
    93  	wantvas := wantva || wantvs
    94  	wantvo := jobVT == lapack.SVDOverwrite
    95  	wantvn := jobVT == lapack.None
    96  
    97  	bi := blas64.Implementation()
    98  	var mnthr int
    99  
   100  	// Compute optimal space for subroutines.
   101  	maxwrk := 1
   102  	opts := string(jobU) + string(jobVT)
   103  	var wrkbl, bdspac int
   104  	if m >= n {
   105  		mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
   106  		bdspac = 5 * n
   107  		impl.Dgeqrf(m, n, a, lda, nil, work, -1)
   108  		lwork_dgeqrf := int(work[0])
   109  		impl.Dorgqr(m, n, n, a, lda, nil, work, -1)
   110  		lwork_dorgqr_n := int(work[0])
   111  		impl.Dorgqr(m, m, n, a, lda, nil, work, -1)
   112  		lwork_dorgqr_m := int(work[0])
   113  		impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1)
   114  		lwork_dgebrd := int(work[0])
   115  		impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, nil, work, -1)
   116  		lwork_dorgbr_p := int(work[0])
   117  		impl.Dorgbr(lapack.ApplyQ, n, n, n, a, lda, nil, work, -1)
   118  		lwork_dorgbr_q := int(work[0])
   119  
   120  		if m >= mnthr {
   121  			// m >> n
   122  			if wantun {
   123  				// Path 1
   124  				maxwrk = n + lwork_dgeqrf
   125  				maxwrk = max(maxwrk, 3*n+lwork_dgebrd)
   126  				if wantvo || wantvas {
   127  					maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
   128  				}
   129  				maxwrk = max(maxwrk, bdspac)
   130  			} else if wantuo && wantvn {
   131  				// Path 2
   132  				wrkbl = n + lwork_dgeqrf
   133  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   134  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   135  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   136  				wrkbl = max(wrkbl, bdspac)
   137  				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
   138  			} else if wantuo && wantvs {
   139  				// Path 3
   140  				wrkbl = n + lwork_dgeqrf
   141  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   142  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   143  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   144  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   145  				wrkbl = max(wrkbl, bdspac)
   146  				maxwrk = max(n*n+wrkbl, n*n+m*n+n)
   147  			} else if wantus && wantvn {
   148  				// Path 4
   149  				wrkbl = n + lwork_dgeqrf
   150  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   151  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   152  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   153  				wrkbl = max(wrkbl, bdspac)
   154  				maxwrk = n*n + wrkbl
   155  			} else if wantus && wantvo {
   156  				// Path 5
   157  				wrkbl = n + lwork_dgeqrf
   158  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   159  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   160  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   161  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   162  				wrkbl = max(wrkbl, bdspac)
   163  				maxwrk = 2*n*n + wrkbl
   164  			} else if wantus && wantvas {
   165  				// Path 6
   166  				wrkbl = n + lwork_dgeqrf
   167  				wrkbl = max(wrkbl, n+lwork_dorgqr_n)
   168  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   169  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   170  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   171  				wrkbl = max(wrkbl, bdspac)
   172  				maxwrk = n*n + wrkbl
   173  			} else if wantua && wantvn {
   174  				// Path 7
   175  				wrkbl = n + lwork_dgeqrf
   176  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   177  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   178  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   179  				wrkbl = max(wrkbl, bdspac)
   180  				maxwrk = n*n + wrkbl
   181  			} else if wantua && wantvo {
   182  				// Path 8
   183  				wrkbl = n + lwork_dgeqrf
   184  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   185  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   186  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   187  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   188  				wrkbl = max(wrkbl, bdspac)
   189  				maxwrk = 2*n*n + wrkbl
   190  			} else if wantua && wantvas {
   191  				// Path 9
   192  				wrkbl = n + lwork_dgeqrf
   193  				wrkbl = max(wrkbl, n+lwork_dorgqr_m)
   194  				wrkbl = max(wrkbl, 3*n+lwork_dgebrd)
   195  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q)
   196  				wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p)
   197  				wrkbl = max(wrkbl, bdspac)
   198  				maxwrk = n*n + wrkbl
   199  			}
   200  		} else {
   201  			// Path 10: m > n
   202  			impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
   203  			lwork_dgebrd := int(work[0])
   204  			maxwrk = 3*n + lwork_dgebrd
   205  			if wantus || wantuo {
   206  				impl.Dorgbr(lapack.ApplyQ, m, n, n, a, lda, nil, work, -1)
   207  				lwork_dorgbr_q = int(work[0])
   208  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
   209  			}
   210  			if wantua {
   211  				impl.Dorgbr(lapack.ApplyQ, m, m, n, a, lda, nil, work, -1)
   212  				lwork_dorgbr_q := int(work[0])
   213  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q)
   214  			}
   215  			if !wantvn {
   216  				maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p)
   217  			}
   218  			maxwrk = max(maxwrk, bdspac)
   219  		}
   220  	} else {
   221  		mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0)
   222  
   223  		bdspac = 5 * m
   224  		impl.Dgelqf(m, n, a, lda, nil, work, -1)
   225  		lwork_dgelqf := int(work[0])
   226  		impl.Dorglq(n, n, m, nil, n, nil, work, -1)
   227  		lwork_dorglq_n := int(work[0])
   228  		impl.Dorglq(m, n, m, a, lda, nil, work, -1)
   229  		lwork_dorglq_m := int(work[0])
   230  		impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1)
   231  		lwork_dgebrd := int(work[0])
   232  		impl.Dorgbr(lapack.ApplyP, m, m, m, a, n, nil, work, -1)
   233  		lwork_dorgbr_p := int(work[0])
   234  		impl.Dorgbr(lapack.ApplyQ, m, m, m, a, n, nil, work, -1)
   235  		lwork_dorgbr_q := int(work[0])
   236  		if n >= mnthr {
   237  			// n >> m
   238  			if wantvn {
   239  				// Path 1t
   240  				maxwrk = m + lwork_dgelqf
   241  				maxwrk = max(maxwrk, 3*m+lwork_dgebrd)
   242  				if wantuo || wantuas {
   243  					maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
   244  				}
   245  				maxwrk = max(maxwrk, bdspac)
   246  			} else if wantvo && wantun {
   247  				// Path 2t
   248  				wrkbl = m + lwork_dgelqf
   249  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   250  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   251  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   252  				wrkbl = max(wrkbl, bdspac)
   253  				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
   254  			} else if wantvo && wantuas {
   255  				// Path 3t
   256  				wrkbl = m + lwork_dgelqf
   257  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   258  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   259  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   260  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   261  				wrkbl = max(wrkbl, bdspac)
   262  				maxwrk = max(m*m+wrkbl, m*m+m*n+m)
   263  			} else if wantvs && wantun {
   264  				// Path 4t
   265  				wrkbl = m + lwork_dgelqf
   266  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   267  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   268  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   269  				wrkbl = max(wrkbl, bdspac)
   270  				maxwrk = m*m + wrkbl
   271  			} else if wantvs && wantuo {
   272  				// Path 5t
   273  				wrkbl = m + lwork_dgelqf
   274  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   275  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   276  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   277  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   278  				wrkbl = max(wrkbl, bdspac)
   279  				maxwrk = 2*m*m + wrkbl
   280  			} else if wantvs && wantuas {
   281  				// Path 6t
   282  				wrkbl = m + lwork_dgelqf
   283  				wrkbl = max(wrkbl, m+lwork_dorglq_m)
   284  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   285  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   286  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   287  				wrkbl = max(wrkbl, bdspac)
   288  				maxwrk = m*m + wrkbl
   289  			} else if wantva && wantun {
   290  				// Path 7t
   291  				wrkbl = m + lwork_dgelqf
   292  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   293  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   294  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   295  				wrkbl = max(wrkbl, bdspac)
   296  				maxwrk = m*m + wrkbl
   297  			} else if wantva && wantuo {
   298  				// Path 8t
   299  				wrkbl = m + lwork_dgelqf
   300  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   301  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   302  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   303  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   304  				wrkbl = max(wrkbl, bdspac)
   305  				maxwrk = 2*m*m + wrkbl
   306  			} else if wantva && wantuas {
   307  				// Path 9t
   308  				wrkbl = m + lwork_dgelqf
   309  				wrkbl = max(wrkbl, m+lwork_dorglq_n)
   310  				wrkbl = max(wrkbl, 3*m+lwork_dgebrd)
   311  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p)
   312  				wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q)
   313  				wrkbl = max(wrkbl, bdspac)
   314  				maxwrk = m*m + wrkbl
   315  			}
   316  		} else {
   317  			// Path 10t, n > m
   318  			impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1)
   319  			lwork_dgebrd = int(work[0])
   320  			maxwrk := 3*m + lwork_dgebrd
   321  			if wantvs || wantvo {
   322  				impl.Dorgbr(lapack.ApplyP, m, n, m, a, n, nil, work, -1)
   323  				lwork_dorgbr_p = int(work[0])
   324  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
   325  			}
   326  			if wantva {
   327  				impl.Dorgbr(lapack.ApplyP, n, n, m, a, n, nil, work, -1)
   328  				lwork_dorgbr_p = int(work[0])
   329  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p)
   330  			}
   331  			if !wantun {
   332  				maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q)
   333  			}
   334  			maxwrk = max(maxwrk, bdspac)
   335  		}
   336  	}
   337  
   338  	minWork := max(1, 5*minmn)
   339  	if !((wantun && m >= mnthr) || (wantvn && n >= mnthr)) {
   340  		minWork = max(minWork, 3*minmn+max(m, n))
   341  	}
   342  
   343  	if lwork != -1 {
   344  		if len(work) < lwork {
   345  			panic(badWork)
   346  		}
   347  		if lwork < minWork {
   348  			panic(badWork)
   349  		}
   350  	}
   351  	if m == 0 || n == 0 {
   352  		return true
   353  	}
   354  
   355  	maxwrk = max(maxwrk, minWork)
   356  	work[0] = float64(maxwrk)
   357  	if lwork == -1 {
   358  		return true
   359  	}
   360  
   361  	// Perform decomposition.
   362  	eps := dlamchE
   363  	smlnum := math.Sqrt(dlamchS) / eps
   364  	bignum := 1 / smlnum
   365  
   366  	// Scale A if max element outside range [smlnum, bignum].
   367  	anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil)
   368  	var iscl bool
   369  	if anrm > 0 && anrm < smlnum {
   370  		iscl = true
   371  		impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda)
   372  	} else if anrm > bignum {
   373  		iscl = true
   374  		impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda)
   375  	}
   376  
   377  	var ie int
   378  	if m >= n {
   379  		// If A has sufficiently more rows than columns, use the QR decomposition.
   380  		if m >= mnthr {
   381  			// m >> n
   382  			if wantun {
   383  				// Path 1.
   384  				itau := 0
   385  				iwork := itau + n
   386  
   387  				// Compute A = Q * R.
   388  				impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   389  
   390  				// Zero out below R.
   391  				impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   392  				ie = 0
   393  				itauq := ie + n
   394  				itaup := itauq + n
   395  				iwork = itaup + n
   396  				// Bidiagonalize R in A.
   397  				impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:],
   398  					work[itaup:], work[iwork:], lwork-iwork)
   399  				ncvt := 0
   400  				if wantvo || wantvas {
   401  					// Generate P^T.
   402  					impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, work[itaup:],
   403  						work[iwork:], lwork-iwork)
   404  					ncvt = n
   405  				}
   406  				iwork = ie + n
   407  
   408  				// Perform bidiagonal QR iteration computing right singular vectors
   409  				// of A in A if desired.
   410  				ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:],
   411  					a, lda, work, 1, work, 1, work[iwork:])
   412  
   413  				// If right singular vectors desired in VT, copy them there.
   414  				if wantvas {
   415  					impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt)
   416  				}
   417  			} else if wantuo && wantvn {
   418  				// Path 2
   419  				panic(noSVDO)
   420  			} else if wantuo && wantvas {
   421  				// Path 3
   422  				panic(noSVDO)
   423  			} else if wantus {
   424  				if wantvn {
   425  					// Path 4
   426  					if lwork >= n*n+max(4*n, bdspac) {
   427  						// Sufficient workspace for a fast algorithm.
   428  						ir := 0
   429  						var ldworkr int
   430  						if lwork >= wrkbl+lda*n {
   431  							ldworkr = lda
   432  						} else {
   433  							ldworkr = n
   434  						}
   435  						itau := ir + ldworkr*n
   436  						iwork := itau + n
   437  						// Compute A = Q * R.
   438  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   439  
   440  						// Copy R to work[ir:], zeroing out below it.
   441  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
   442  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
   443  
   444  						// Generate Q in A.
   445  						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   446  						ie := itau
   447  						itauq := ie + n
   448  						itaup := itauq + n
   449  						iwork = itaup + n
   450  
   451  						// Bidiagonalize R in work[ir:].
   452  						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
   453  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   454  
   455  						// Generate left vectors bidiagonalizing R in work[ir:].
   456  						impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
   457  							work[itauq:], work[iwork:], lwork-iwork)
   458  						iwork = ie + n
   459  
   460  						// Perform bidiagonal QR iteration, compuing left singular
   461  						// vectors of R in work[ir:].
   462  						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
   463  							work[ir:], ldworkr, work, 1, work[iwork:])
   464  
   465  						// Multiply Q in A by left singular vectors of R in
   466  						// work[ir:], storing result in U.
   467  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
   468  							work[ir:], ldworkr, 0, u, ldu)
   469  					} else {
   470  						// Insufficient workspace for a fast algorithm.
   471  						itau := 0
   472  						iwork := itau + n
   473  
   474  						// Compute A = Q*R, copying result to U.
   475  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   476  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   477  
   478  						// Generate Q in U.
   479  						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   480  						ie := itau
   481  						itauq := ie + n
   482  						itaup := itauq + n
   483  						iwork = itaup + n
   484  
   485  						// Zero out below R in A.
   486  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   487  
   488  						// Bidiagonalize R in A.
   489  						impl.Dgebrd(n, n, a, lda, s, work[ie:],
   490  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   491  
   492  						// Multiply Q in U by left vectors bidiagonalizing R.
   493  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   494  							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   495  						iwork = ie + n
   496  
   497  						// Perform bidiagonal QR iteration, computing left
   498  						// singular vectors of A in U.
   499  						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1,
   500  							u, ldu, work, 1, work[iwork:])
   501  					}
   502  				} else if wantvo {
   503  					// Path 5
   504  					panic(noSVDO)
   505  				} else if wantvas {
   506  					// Path 6
   507  					if lwork >= n*n+max(4*n, bdspac) {
   508  						// Sufficient workspace for a fast algorithm.
   509  						iu := 0
   510  						var ldworku int
   511  						if lwork >= wrkbl+lda*n {
   512  							ldworku = lda
   513  						} else {
   514  							ldworku = n
   515  						}
   516  						itau := iu + ldworku*n
   517  						iwork := itau + n
   518  
   519  						// Compute A = Q * R.
   520  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   521  						// Copy R to work[iu:], zeroing out below it.
   522  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
   523  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
   524  
   525  						// Generate Q in A.
   526  						impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   527  
   528  						ie := itau
   529  						itauq := ie + n
   530  						itaup := itauq + n
   531  						iwork = itaup + n
   532  
   533  						// Bidiagonalize R in work[iu:], copying result to VT.
   534  						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
   535  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   536  						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
   537  
   538  						// Generate left bidiagonalizing vectors in work[iu:].
   539  						impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
   540  							work[itauq:], work[iwork:], lwork-iwork)
   541  
   542  						// Generate right bidiagonalizing vectors in VT.
   543  						impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
   544  							work[itaup:], work[iwork:], lwork-iwork)
   545  						iwork = ie + n
   546  
   547  						// Perform bidiagonal QR iteration, computing left singular
   548  						// vectors of R in work[iu:], and computing right singular
   549  						// vectors of R in VT.
   550  						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
   551  							vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
   552  
   553  						// Multiply Q in A by left singular vectors of R in
   554  						// work[iu:], storing result in U.
   555  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda,
   556  							work[iu:], ldworku, 0, u, ldu)
   557  					} else {
   558  						// Insufficient workspace for a fast algorithm.
   559  						itau := 0
   560  						iwork := itau + n
   561  
   562  						// Compute A = Q * R, copying result to U.
   563  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   564  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   565  
   566  						// Generate Q in U.
   567  						impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   568  
   569  						// Copy R to VT, zeroing out below it.
   570  						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   571  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
   572  
   573  						ie := itau
   574  						itauq := ie + n
   575  						itaup := itauq + n
   576  						iwork = itaup + n
   577  
   578  						// Bidiagonalize R in VT.
   579  						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   580  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   581  
   582  						// Multiply Q in U by left bidiagonalizing vectors in VT.
   583  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   584  							vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   585  
   586  						// Generate right bidiagonalizing vectors in VT.
   587  						impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
   588  							work[itaup:], work[iwork:], lwork-iwork)
   589  						iwork = ie + n
   590  
   591  						// Perform bidiagonal QR iteration, computing left singular
   592  						// vectors of A in U and computing right singular vectors
   593  						// of A in VT.
   594  						ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   595  							vt, ldvt, u, ldu, work, 1, work[iwork:])
   596  					}
   597  				}
   598  			} else if wantua {
   599  				if wantvn {
   600  					// Path 7
   601  					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
   602  						// Sufficient workspace for a fast algorithm.
   603  						ir := 0
   604  						var ldworkr int
   605  						if lwork >= wrkbl+lda*n {
   606  							ldworkr = lda
   607  						} else {
   608  							ldworkr = n
   609  						}
   610  						itau := ir + ldworkr*n
   611  						iwork := itau + n
   612  
   613  						// Compute A = Q*R, copying result to U.
   614  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   615  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   616  
   617  						// Copy R to work[ir:], zeroing out below it.
   618  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr)
   619  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr)
   620  
   621  						// Generate Q in U.
   622  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   623  						ie := itau
   624  						itauq := ie + n
   625  						itaup := itauq + n
   626  						iwork = itaup + n
   627  
   628  						// Bidiagonalize R in work[ir:].
   629  						impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:],
   630  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   631  
   632  						// Generate left bidiagonalizing vectors in work[ir:].
   633  						impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr,
   634  							work[itauq:], work[iwork:], lwork-iwork)
   635  						iwork = ie + n
   636  
   637  						// Perform bidiagonal QR iteration, computing left singular
   638  						// vectors of R in work[ir:].
   639  						ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1,
   640  							work[ir:], ldworkr, work, 1, work[iwork:])
   641  
   642  						// Multiply Q in U by left singular vectors of R in
   643  						// work[ir:], storing result in A.
   644  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu,
   645  							work[ir:], ldworkr, 0, a, lda)
   646  
   647  						// Copy left singular vectors of A from A to U.
   648  						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
   649  					} else {
   650  						// Insufficient workspace for a fast algorithm.
   651  						itau := 0
   652  						iwork := itau + n
   653  
   654  						// Compute A = Q*R, copying result to U.
   655  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   656  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   657  
   658  						// Generate Q in U.
   659  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   660  						ie := itau
   661  						itauq := ie + n
   662  						itaup := itauq + n
   663  						iwork = itaup + n
   664  
   665  						// Zero out below R in A.
   666  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda)
   667  
   668  						// Bidiagonalize R in A.
   669  						impl.Dgebrd(n, n, a, lda, s, work[ie:],
   670  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   671  
   672  						// Multiply Q in U by left bidiagonalizing vectors in A.
   673  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n,
   674  							a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   675  						iwork = ie + n
   676  
   677  						// Perform bidiagonal QR iteration, computing left
   678  						// singular vectors of A in U.
   679  						ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:],
   680  							work, 1, u, ldu, work, 1, work[iwork:])
   681  					}
   682  				} else if wantvo {
   683  					// Path 8.
   684  					panic(noSVDO)
   685  				} else if wantvas {
   686  					// Path 9.
   687  					if lwork >= n*n+max(max(n+m, 4*n), bdspac) {
   688  						// Sufficient workspace for a fast algorithm.
   689  						iu := 0
   690  						var ldworku int
   691  						if lwork >= wrkbl+lda*n {
   692  							ldworku = lda
   693  						} else {
   694  							ldworku = n
   695  						}
   696  						itau := iu + ldworku*n
   697  						iwork := itau + n
   698  
   699  						// Compute A = Q * R, copying result to U.
   700  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   701  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   702  
   703  						// Generate Q in U.
   704  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   705  
   706  						// Copy R to work[iu:], zeroing out below it.
   707  						impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku)
   708  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku)
   709  
   710  						ie = itau
   711  						itauq := ie + n
   712  						itaup := itauq + n
   713  						iwork = itaup + n
   714  
   715  						// Bidiagonalize R in work[iu:], copying result to VT.
   716  						impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:],
   717  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   718  						impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt)
   719  
   720  						// Generate left bidiagonalizing vectors in work[iu:].
   721  						impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku,
   722  							work[itauq:], work[iwork:], lwork-iwork)
   723  
   724  						// Generate right bidiagonalizing vectors in VT.
   725  						impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
   726  							work[itaup:], work[iwork:], lwork-iwork)
   727  						iwork = ie + n
   728  
   729  						// Perform bidiagonal QR iteration, computing left singular
   730  						// vectors of R in work[iu:] and computing right
   731  						// singular vectors of R in VT.
   732  						ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:],
   733  							vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:])
   734  
   735  						// Multiply Q in U by left singular vectors of R in
   736  						// work[iu:], storing result in A.
   737  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1,
   738  							u, ldu, work[iu:], ldworku, 0, a, lda)
   739  
   740  						// Copy left singular vectors of A from A to U.
   741  						impl.Dlacpy(blas.All, m, n, a, lda, u, ldu)
   742  
   743  						/*
   744  							// Bidiagonalize R in VT.
   745  							impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   746  								work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   747  
   748  							// Multiply Q in U by left bidiagonalizing vectors in VT.
   749  							impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
   750  								m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   751  
   752  							// Generate right bidiagonalizing vectors in VT.
   753  							impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
   754  								work[itaup:], work[iwork:], lwork-iwork)
   755  							iwork = ie + n
   756  
   757  							// Perform bidiagonal QR iteration, computing left singular
   758  							// vectors of A in U and computing right singular vectors
   759  							// of A in VT.
   760  							ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   761  								vt, ldvt, u, ldu, work, 1, work[iwork:])
   762  						*/
   763  					} else {
   764  						// Insufficient workspace for a fast algorithm.
   765  						itau := 0
   766  						iwork := itau + n
   767  
   768  						// Compute A = Q*R, copying result to U.
   769  						impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   770  						impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   771  
   772  						// Generate Q in U.
   773  						impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork)
   774  
   775  						// Copy R from A to VT, zeroing out below it.
   776  						impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   777  						impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt)
   778  
   779  						ie := itau
   780  						itauq := ie + n
   781  						itaup := itauq + n
   782  						iwork = itaup + n
   783  
   784  						// Bidiagonalize R in VT.
   785  						impl.Dgebrd(n, n, vt, ldvt, s, work[ie:],
   786  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   787  
   788  						// Multiply Q in U by left bidiagonalizing vectors in VT.
   789  						impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans,
   790  							m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork)
   791  
   792  						// Generate right bidiagonizing vectors in VT.
   793  						impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt,
   794  							work[itaup:], work[iwork:], lwork-iwork)
   795  						iwork = ie + n
   796  
   797  						// Perform bidiagonal QR iteration, computing left singular
   798  						// vectors of A in U and computing right singular vectors
   799  						// of A in VT.
   800  						impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:],
   801  							vt, ldvt, u, ldu, work, 1, work[iwork:])
   802  					}
   803  				}
   804  			}
   805  		} else {
   806  			// Path 10.
   807  			// M at least N, but not much larger.
   808  			ie = 0
   809  			itauq := ie + n
   810  			itaup := itauq + n
   811  			iwork := itaup + n
   812  
   813  			// Bidiagonalize A.
   814  			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:],
   815  				work[itaup:], work[iwork:], lwork-iwork)
   816  			if wantuas {
   817  				// Left singular vectors are desired in U. Copy result to U and
   818  				// generate left biadiagonalizing vectors in U.
   819  				impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu)
   820  				var ncu int
   821  				if wantus {
   822  					ncu = n
   823  				}
   824  				if wantua {
   825  					ncu = m
   826  				}
   827  				impl.Dorgbr(lapack.ApplyQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
   828  			}
   829  			if wantvas {
   830  				// Right singular vectors are desired in VT. Copy result to VT and
   831  				// generate left biadiagonalizing vectors in VT.
   832  				impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt)
   833  				impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
   834  			}
   835  			if wantuo {
   836  				panic(noSVDO)
   837  			}
   838  			if wantvo {
   839  				panic(noSVDO)
   840  			}
   841  			iwork = ie + n
   842  			var nru, ncvt int
   843  			if wantuas || wantuo {
   844  				nru = m
   845  			}
   846  			if wantun {
   847  				nru = 0
   848  			}
   849  			if wantvas || wantvo {
   850  				ncvt = n
   851  			}
   852  			if wantvn {
   853  				ncvt = 0
   854  			}
   855  			if !wantuo && !wantvo {
   856  				// Perform bidiagonal QR iteration, if desired, computing left
   857  				// singular vectors in U and right singular vectors in VT.
   858  				ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:],
   859  					vt, ldvt, u, ldu, work, 1, work[iwork:])
   860  			} else {
   861  				// There will be two branches when the implementation is complete.
   862  				panic(noSVDO)
   863  			}
   864  		}
   865  	} else {
   866  		// A has more columns than rows. If A has sufficiently more columns than
   867  		// rows, first reduce using the LQ decomposition.
   868  		if n >= mnthr {
   869  			// n >> m.
   870  			if wantvn {
   871  				// Path 1t.
   872  				itau := 0
   873  				iwork := itau + m
   874  
   875  				// Compute A = L*Q.
   876  				impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   877  
   878  				// Zero out above L.
   879  				impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
   880  				ie := 0
   881  				itauq := ie + m
   882  				itaup := itauq + m
   883  				iwork = itaup + m
   884  
   885  				// Bidiagonalize L in A.
   886  				impl.Dgebrd(m, m, a, lda, s, work[ie:itauq],
   887  					work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork)
   888  				if wantuo || wantuas {
   889  					impl.Dorgbr(lapack.ApplyQ, m, m, m, a, lda,
   890  						work[itauq:], work[iwork:], lwork-iwork)
   891  				}
   892  				iwork = ie + m
   893  				nru := 0
   894  				if wantuo || wantuas {
   895  					nru = m
   896  				}
   897  
   898  				// Perform bidiagonal QR iteration, computing left singular vectors
   899  				// of A in A if desired.
   900  				ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:],
   901  					work, 1, a, lda, work, 1, work[iwork:])
   902  
   903  				// If left singular vectors desired in U, copy them there.
   904  				if wantuas {
   905  					impl.Dlacpy(blas.All, m, m, a, lda, u, ldu)
   906  				}
   907  			} else if wantvo && wantun {
   908  				// Path 2t.
   909  				panic(noSVDO)
   910  			} else if wantvo && wantuas {
   911  				// Path 3t.
   912  				panic(noSVDO)
   913  			} else if wantvs {
   914  				if wantun {
   915  					// Path 4t.
   916  					if lwork >= m*m+max(4*m, bdspac) {
   917  						// Sufficient workspace for a fast algorithm.
   918  						ir := 0
   919  						var ldworkr int
   920  						if lwork >= wrkbl+lda*m {
   921  							ldworkr = lda
   922  						} else {
   923  							ldworkr = m
   924  						}
   925  						itau := ir + ldworkr*m
   926  						iwork := itau + m
   927  
   928  						// Compute A = L*Q.
   929  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   930  
   931  						// Copy L to work[ir:], zeroing out above it.
   932  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
   933  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
   934  
   935  						// Generate Q in A.
   936  						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
   937  						ie := itau
   938  						itauq := ie + m
   939  						itaup := itauq + m
   940  						iwork = itaup + m
   941  
   942  						// Bidiagonalize L in work[ir:].
   943  						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
   944  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   945  
   946  						// Generate right vectors bidiagonalizing L in work[ir:].
   947  						impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
   948  							work[itaup:], work[iwork:], lwork-iwork)
   949  						iwork = ie + m
   950  
   951  						// Perform bidiagonal QR iteration, computing right singular
   952  						// vectors of L in work[ir:].
   953  						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
   954  							work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
   955  
   956  						// Multiply right singular vectors of L in work[ir:] by
   957  						// Q in A, storing result in VT.
   958  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
   959  							work[ir:], ldworkr, a, lda, 0, vt, ldvt)
   960  					} else {
   961  						// Insufficient workspace for a fast algorithm.
   962  						itau := 0
   963  						iwork := itau + m
   964  
   965  						// Compute A = L*Q.
   966  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
   967  
   968  						// Copy result to VT.
   969  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
   970  
   971  						// Generate Q in VT.
   972  						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
   973  						ie := itau
   974  						itauq := ie + m
   975  						itaup := itauq + m
   976  						iwork = itaup + m
   977  
   978  						// Zero out above L in A.
   979  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
   980  
   981  						// Bidiagonalize L in A.
   982  						impl.Dgebrd(m, m, a, lda, s, work[ie:],
   983  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
   984  
   985  						// Multiply right vectors bidiagonalizing L by Q in VT.
   986  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
   987  							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
   988  						iwork = ie + m
   989  
   990  						// Perform bidiagonal QR iteration, computing right
   991  						// singular vectors of A in VT.
   992  						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
   993  							vt, ldvt, work, 1, work, 1, work[iwork:])
   994  					}
   995  				} else if wantuo {
   996  					// Path 5t.
   997  					panic(noSVDO)
   998  				} else if wantuas {
   999  					// Path 6t.
  1000  					if lwork >= m*m+max(4*m, bdspac) {
  1001  						// Sufficient workspace for a fast algorithm.
  1002  						iu := 0
  1003  						var ldworku int
  1004  						if lwork >= wrkbl+lda*m {
  1005  							ldworku = lda
  1006  						} else {
  1007  							ldworku = m
  1008  						}
  1009  						itau := iu + ldworku*m
  1010  						iwork := itau + m
  1011  
  1012  						// Compute A = L*Q.
  1013  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1014  
  1015  						// Copy L to work[iu:], zeroing out above it.
  1016  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
  1017  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
  1018  
  1019  						// Generate Q in A.
  1020  						impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1021  						ie := itau
  1022  						itauq := ie + m
  1023  						itaup := itauq + m
  1024  						iwork = itaup + m
  1025  
  1026  						// Bidiagonalize L in work[iu:], copying result to U.
  1027  						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
  1028  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1029  						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
  1030  
  1031  						// Generate right bidiagionalizing vectors in work[iu:].
  1032  						impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
  1033  							work[itaup:], work[iwork:], lwork-iwork)
  1034  
  1035  						// Generate left bidiagonalizing vectors in U.
  1036  						impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1037  						iwork = ie + m
  1038  
  1039  						// Perform bidiagonal QR iteration, computing left singular
  1040  						// vectors of L in U and computing right singular vectors of
  1041  						// L in work[iu:].
  1042  						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
  1043  							work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
  1044  
  1045  						// Multiply right singular vectors of L in work[iu:] by
  1046  						// Q in A, storing result in VT.
  1047  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1048  							work[iu:], ldworku, a, lda, 0, vt, ldvt)
  1049  					} else {
  1050  						// Insufficient workspace for a fast algorithm.
  1051  						itau := 0
  1052  						iwork := itau + m
  1053  
  1054  						// Compute A = L*Q, copying result to VT.
  1055  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1056  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1057  
  1058  						// Generate Q in VT.
  1059  						impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1060  
  1061  						// Copy L to U, zeroing out above it.
  1062  						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1063  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
  1064  
  1065  						ie := itau
  1066  						itauq := ie + m
  1067  						itaup := itauq + m
  1068  						iwork = itaup + m
  1069  
  1070  						// Bidiagonalize L in U.
  1071  						impl.Dgebrd(m, m, u, ldu, s, work[ie:],
  1072  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1073  
  1074  						// Multiply right bidiagonalizing vectors in U by Q in VT.
  1075  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1076  							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1077  
  1078  						// Generate left bidiagonalizing vectors in U.
  1079  						impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1080  						iwork = ie + m
  1081  
  1082  						// Perform bidiagonal QR iteration, computing left singular
  1083  						// vectors of A in U and computing right singular vectors
  1084  						// of A in VT.
  1085  						impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt,
  1086  							u, ldu, work, 1, work[iwork:])
  1087  					}
  1088  				}
  1089  			} else if wantva {
  1090  				if wantun {
  1091  					// Path 7t.
  1092  					if lwork >= m*m+max(max(n+m, 4*m), bdspac) {
  1093  						// Sufficient workspace for a fast algorithm.
  1094  						ir := 0
  1095  						var ldworkr int
  1096  						if lwork >= wrkbl+lda*m {
  1097  							ldworkr = lda
  1098  						} else {
  1099  							ldworkr = m
  1100  						}
  1101  						itau := ir + ldworkr*m
  1102  						iwork := itau + m
  1103  
  1104  						// Compute A = L*Q, copying result to VT.
  1105  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1106  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1107  
  1108  						// Copy L to work[ir:], zeroing out above it.
  1109  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr)
  1110  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr)
  1111  
  1112  						// Generate Q in VT.
  1113  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1114  
  1115  						ie := itau
  1116  						itauq := ie + m
  1117  						itaup := itauq + m
  1118  						iwork = itaup + m
  1119  
  1120  						// Bidiagonalize L in work[ir:].
  1121  						impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:],
  1122  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1123  
  1124  						// Generate right bidiagonalizing vectors in work[ir:].
  1125  						impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr,
  1126  							work[itaup:], work[iwork:], lwork-iwork)
  1127  						iwork = ie + m
  1128  
  1129  						// Perform bidiagonal QR iteration, computing right
  1130  						// singular vectors of L in work[ir:].
  1131  						ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:],
  1132  							work[ir:], ldworkr, work, 1, work, 1, work[iwork:])
  1133  
  1134  						// Multiply right singular vectors of L in work[ir:] by
  1135  						// Q in VT, storing result in A.
  1136  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1137  							work[ir:], ldworkr, vt, ldvt, 0, a, lda)
  1138  
  1139  						// Copy right singular vectors of A from A to VT.
  1140  						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
  1141  					} else {
  1142  						// Insufficient workspace for a fast algorithm.
  1143  						itau := 0
  1144  						iwork := itau + m
  1145  						// Compute A = L * Q, copying result to VT.
  1146  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1147  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1148  
  1149  						// Generate Q in VT.
  1150  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1151  
  1152  						ie := itau
  1153  						itauq := ie + m
  1154  						itaup := itauq + m
  1155  						iwork = itaup + m
  1156  
  1157  						// Zero out above L in A.
  1158  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda)
  1159  
  1160  						// Bidiagonalize L in A.
  1161  						impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:],
  1162  							work[itaup:], work[iwork:], lwork-iwork)
  1163  
  1164  						// Multiply right bidiagonalizing vectors in A by Q in VT.
  1165  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1166  							a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1167  						iwork = ie + m
  1168  
  1169  						// Perform bidiagonal QR iteration, computing right singular
  1170  						// vectors of A in VT.
  1171  						ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:],
  1172  							vt, ldvt, work, 1, work, 1, work[iwork:])
  1173  					}
  1174  				} else if wantuo {
  1175  					panic(noSVDO)
  1176  				} else if wantuas {
  1177  					// Path 9t.
  1178  					if lwork >= m*m+max(max(m+n, 4*m), bdspac) {
  1179  						// Sufficient workspace for a fast algorithm.
  1180  						iu := 0
  1181  
  1182  						var ldworku int
  1183  						if lwork >= wrkbl+lda*m {
  1184  							ldworku = lda
  1185  						} else {
  1186  							ldworku = m
  1187  						}
  1188  						itau := iu + ldworku*m
  1189  						iwork := itau + m
  1190  
  1191  						// Generate A = L * Q copying result to VT.
  1192  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1193  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1194  
  1195  						// Generate Q in VT.
  1196  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1197  
  1198  						// Copy L to work[iu:], zeroing out above it.
  1199  						impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku)
  1200  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku)
  1201  						ie = itau
  1202  						itauq := ie + m
  1203  						itaup := itauq + m
  1204  						iwork = itaup + m
  1205  
  1206  						// Bidiagonalize L in work[iu:], copying result to U.
  1207  						impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:],
  1208  							work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1209  						impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu)
  1210  
  1211  						// Generate right bidiagonalizing vectors in work[iu:].
  1212  						impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku,
  1213  							work[itaup:], work[iwork:], lwork-iwork)
  1214  
  1215  						// Generate left bidiagonalizing vectors in U.
  1216  						impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1217  						iwork = ie + m
  1218  
  1219  						// Perform bidiagonal QR iteration, computing left singular
  1220  						// vectors of L in U and computing right singular vectors
  1221  						// of L in work[iu:].
  1222  						ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:],
  1223  							work[iu:], ldworku, u, ldu, work, 1, work[iwork:])
  1224  
  1225  						// Multiply right singular vectors of L in work[iu:]
  1226  						// Q in VT, storing result in A.
  1227  						bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1,
  1228  							work[iu:], ldworku, vt, ldvt, 0, a, lda)
  1229  
  1230  						// Copy right singular vectors of A from A to VT.
  1231  						impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt)
  1232  					} else {
  1233  						// Insufficient workspace for a fast algorithm.
  1234  						itau := 0
  1235  						iwork := itau + m
  1236  
  1237  						// Compute A = L * Q, copying result to VT.
  1238  						impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork)
  1239  						impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1240  
  1241  						// Generate Q in VT.
  1242  						impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork)
  1243  
  1244  						// Copy L to U, zeroing out above it.
  1245  						impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1246  						impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu)
  1247  
  1248  						ie = itau
  1249  						itauq := ie + m
  1250  						itaup := itauq + m
  1251  						iwork = itaup + m
  1252  
  1253  						// Bidiagonalize L in U.
  1254  						impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:],
  1255  							work[itaup:], work[iwork:], lwork-iwork)
  1256  
  1257  						// Multiply right bidiagonalizing vectors in U by Q in VT.
  1258  						impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m,
  1259  							u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork)
  1260  
  1261  						// Generate left bidiagonalizing vectors in U.
  1262  						impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1263  						iwork = ie + m
  1264  
  1265  						// Perform bidiagonal QR iteration, computing left singular
  1266  						// vectors of A in U and computing right singular vectors
  1267  						// of A in VT.
  1268  						ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:],
  1269  							vt, ldvt, u, ldu, work, 1, work[iwork:])
  1270  					}
  1271  				}
  1272  			}
  1273  		} else {
  1274  			// Path 10t.
  1275  			// N at least M, but not much larger.
  1276  			ie = 0
  1277  			itauq := ie + m
  1278  			itaup := itauq + m
  1279  			iwork := itaup + m
  1280  
  1281  			// Bidiagonalize A.
  1282  			impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork)
  1283  			if wantuas {
  1284  				// If left singular vectors desired in U, copy result to U and
  1285  				// generate left bidiagonalizing vectors in U.
  1286  				impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu)
  1287  				impl.Dorgbr(lapack.ApplyQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork)
  1288  			}
  1289  			if wantvas {
  1290  				// If right singular vectors desired in VT, copy result to VT
  1291  				// and generate right bidiagonalizing vectors in VT.
  1292  				impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt)
  1293  				var nrvt int
  1294  				if wantva {
  1295  					nrvt = n
  1296  				} else {
  1297  					nrvt = m
  1298  				}
  1299  				impl.Dorgbr(lapack.ApplyP, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork)
  1300  			}
  1301  			if wantuo {
  1302  				panic(noSVDO)
  1303  			}
  1304  			if wantvo {
  1305  				panic(noSVDO)
  1306  			}
  1307  			iwork = ie + m
  1308  			var nru, ncvt int
  1309  			if wantuas || wantuo {
  1310  				nru = m
  1311  			}
  1312  			if wantvas || wantvo {
  1313  				ncvt = n
  1314  			}
  1315  			if !wantuo && !wantvo {
  1316  				// Perform bidiagonal QR iteration, if desired, computing left
  1317  				// singular vectors in U and computing right singular vectors in
  1318  				// VT.
  1319  				ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:],
  1320  					vt, ldvt, u, ldu, work, 1, work[iwork:])
  1321  			} else {
  1322  				// There will be two branches when the implementation is complete.
  1323  				panic(noSVDO)
  1324  			}
  1325  		}
  1326  	}
  1327  	if !ok {
  1328  		if ie > 1 {
  1329  			for i := 0; i < minmn-1; i++ {
  1330  				work[i+1] = work[i+ie]
  1331  			}
  1332  		}
  1333  		if ie < 1 {
  1334  			for i := minmn - 2; i >= 0; i-- {
  1335  				work[i+1] = work[i+ie]
  1336  			}
  1337  		}
  1338  	}
  1339  	// Undo scaling if necessary.
  1340  	if iscl {
  1341  		if anrm > bignum {
  1342  			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn, 1, s, minmn)
  1343  		}
  1344  		if !ok && anrm > bignum {
  1345  			impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn-1, 1, work[minmn:], minmn)
  1346  		}
  1347  		if anrm < smlnum {
  1348  			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn, 1, s, minmn)
  1349  		}
  1350  		if !ok && anrm < smlnum {
  1351  			impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn-1, 1, work[minmn:], minmn)
  1352  		}
  1353  	}
  1354  	work[0] = float64(maxwrk)
  1355  	return ok
  1356  }