github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dgesvd.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math" 9 10 "github.com/jingcheng-WU/gonum/blas" 11 "github.com/jingcheng-WU/gonum/blas/blas64" 12 "github.com/jingcheng-WU/gonum/lapack" 13 ) 14 15 const noSVDO = "dgesvd: not coded for overwrite" 16 17 // Dgesvd computes the singular value decomposition of the input matrix A. 18 // 19 // The singular value decomposition is 20 // A = U * Sigma * Vᵀ 21 // where Sigma is an m×n diagonal matrix containing the singular values of A, 22 // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first 23 // min(m,n) columns of U and V are the left and right singular vectors of A 24 // respectively. 25 // 26 // jobU and jobVT are options for computing the singular vectors. The behavior 27 // is as follows 28 // jobU == lapack.SVDAll All m columns of U are returned in u 29 // jobU == lapack.SVDStore The first min(m,n) columns are returned in u 30 // jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a 31 // jobU == lapack.SVDNone The columns of U are not computed. 32 // The behavior is the same for jobVT and the rows of Vᵀ. At most one of jobU 33 // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise. 34 // 35 // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd 36 // the data is overwritten. On exit, A contains the appropriate singular vectors 37 // if either job is lapack.SVDOverwrite. 38 // 39 // s is a slice of length at least min(m,n) and on exit contains the singular 40 // values in decreasing order. 41 // 42 // u contains the left singular vectors on exit, stored column-wise. If 43 // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDStore u is 44 // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is 45 // not used. 46 // 47 // vt contains the left singular vectors on exit, stored row-wise. If 48 // jobV == lapack.SVDAll, vt is of size n×n. If jobVT == lapack.SVDStore vt is 49 // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is 50 // not used. 51 // 52 // work is a slice for storing temporary memory, and lwork is the usable size of 53 // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)). 54 // If lwork == -1, instead of performing Dgesvd, the optimal work length will be 55 // stored into work[0]. Dgesvd will panic if the working memory has insufficient 56 // storage. 57 // 58 // Dgesvd returns whether the decomposition successfully completed. 59 func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) { 60 if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite { 61 panic(noSVDO) 62 } 63 64 wantua := jobU == lapack.SVDAll 65 wantus := jobU == lapack.SVDStore 66 wantuas := wantua || wantus 67 wantuo := jobU == lapack.SVDOverwrite 68 wantun := jobU == lapack.SVDNone 69 if !(wantua || wantus || wantuo || wantun) { 70 panic(badSVDJob) 71 } 72 73 wantva := jobVT == lapack.SVDAll 74 wantvs := jobVT == lapack.SVDStore 75 wantvas := wantva || wantvs 76 wantvo := jobVT == lapack.SVDOverwrite 77 wantvn := jobVT == lapack.SVDNone 78 if !(wantva || wantvs || wantvo || wantvn) { 79 panic(badSVDJob) 80 } 81 82 if wantuo && wantvo { 83 panic(bothSVDOver) 84 } 85 86 minmn := min(m, n) 87 minwork := 1 88 if minmn > 0 { 89 minwork = max(3*minmn+max(m, n), 5*minmn) 90 } 91 switch { 92 case m < 0: 93 panic(mLT0) 94 case n < 0: 95 panic(nLT0) 96 case lda < max(1, n): 97 panic(badLdA) 98 case ldu < 1, wantua && ldu < m, wantus && ldu < minmn: 99 panic(badLdU) 100 case ldvt < 1 || (wantvas && ldvt < n): 101 panic(badLdVT) 102 case lwork < minwork && lwork != -1: 103 panic(badLWork) 104 case len(work) < max(1, lwork): 105 panic(shortWork) 106 } 107 108 // Quick return if possible. 109 if minmn == 0 { 110 work[0] = 1 111 return true 112 } 113 114 // Compute optimal workspace size for subroutines. 115 opts := string(jobU) + string(jobVT) 116 mnthr := impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0) 117 maxwrk := 1 118 var wrkbl, bdspac int 119 if m >= n { 120 bdspac = 5 * n 121 impl.Dgeqrf(m, n, a, lda, nil, work, -1) 122 lwork_dgeqrf := int(work[0]) 123 124 impl.Dorgqr(m, n, n, a, lda, nil, work, -1) 125 lwork_dorgqr_n := int(work[0]) 126 impl.Dorgqr(m, m, n, a, lda, nil, work, -1) 127 lwork_dorgqr_m := int(work[0]) 128 129 impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1) 130 lwork_dgebrd := int(work[0]) 131 132 impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, nil, work, -1) 133 lwork_dorgbr_p := int(work[0]) 134 135 impl.Dorgbr(lapack.GenerateQ, n, n, n, a, lda, nil, work, -1) 136 lwork_dorgbr_q := int(work[0]) 137 138 if m >= mnthr { 139 if wantun { 140 // Path 1 (m much larger than n, jobU == None) 141 maxwrk = n + lwork_dgeqrf 142 maxwrk = max(maxwrk, 3*n+lwork_dgebrd) 143 if wantvo || wantvas { 144 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p) 145 } 146 maxwrk = max(maxwrk, bdspac) 147 } else if wantuo && wantvn { 148 // Path 2 (m much larger than n, jobU == Overwrite, jobVT == None) 149 wrkbl = n + lwork_dgeqrf 150 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 151 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 152 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 153 wrkbl = max(wrkbl, bdspac) 154 maxwrk = max(n*n+wrkbl, n*n+m*n+n) 155 } else if wantuo && wantvas { 156 // Path 3 (m much larger than n, jobU == Overwrite, jobVT == Store or All) 157 wrkbl = n + lwork_dgeqrf 158 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 159 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 160 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 161 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 162 wrkbl = max(wrkbl, bdspac) 163 maxwrk = max(n*n+wrkbl, n*n+m*n+n) 164 } else if wantus && wantvn { 165 // Path 4 (m much larger than n, jobU == Store, jobVT == None) 166 wrkbl = n + lwork_dgeqrf 167 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 168 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 169 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 170 wrkbl = max(wrkbl, bdspac) 171 maxwrk = n*n + wrkbl 172 } else if wantus && wantvo { 173 // Path 5 (m much larger than n, jobU == Store, jobVT == Overwrite) 174 wrkbl = n + lwork_dgeqrf 175 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 176 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 177 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 178 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 179 wrkbl = max(wrkbl, bdspac) 180 maxwrk = 2*n*n + wrkbl 181 } else if wantus && wantvas { 182 // Path 6 (m much larger than n, jobU == Store, jobVT == Store or All) 183 wrkbl = n + lwork_dgeqrf 184 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 185 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 186 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 187 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 188 wrkbl = max(wrkbl, bdspac) 189 maxwrk = n*n + wrkbl 190 } else if wantua && wantvn { 191 // Path 7 (m much larger than n, jobU == All, jobVT == None) 192 wrkbl = n + lwork_dgeqrf 193 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 194 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 195 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 196 wrkbl = max(wrkbl, bdspac) 197 maxwrk = n*n + wrkbl 198 } else if wantua && wantvo { 199 // Path 8 (m much larger than n, jobU == All, jobVT == Overwrite) 200 wrkbl = n + lwork_dgeqrf 201 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 202 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 203 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 204 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 205 wrkbl = max(wrkbl, bdspac) 206 maxwrk = 2*n*n + wrkbl 207 } else if wantua && wantvas { 208 // Path 9 (m much larger than n, jobU == All, jobVT == Store or All) 209 wrkbl = n + lwork_dgeqrf 210 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 211 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 212 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 213 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 214 wrkbl = max(wrkbl, bdspac) 215 maxwrk = n*n + wrkbl 216 } 217 } else { 218 // Path 10 (m at least n, but not much larger) 219 impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1) 220 lwork_dgebrd := int(work[0]) 221 maxwrk = 3*n + lwork_dgebrd 222 if wantus || wantuo { 223 impl.Dorgbr(lapack.GenerateQ, m, n, n, a, lda, nil, work, -1) 224 lwork_dorgbr_q = int(work[0]) 225 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q) 226 } 227 if wantua { 228 impl.Dorgbr(lapack.GenerateQ, m, m, n, a, lda, nil, work, -1) 229 lwork_dorgbr_q := int(work[0]) 230 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q) 231 } 232 if !wantvn { 233 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p) 234 } 235 maxwrk = max(maxwrk, bdspac) 236 } 237 } else { 238 bdspac = 5 * m 239 240 impl.Dgelqf(m, n, a, lda, nil, work, -1) 241 lwork_dgelqf := int(work[0]) 242 243 impl.Dorglq(n, n, m, nil, n, nil, work, -1) 244 lwork_dorglq_n := int(work[0]) 245 impl.Dorglq(m, n, m, a, lda, nil, work, -1) 246 lwork_dorglq_m := int(work[0]) 247 248 impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1) 249 lwork_dgebrd := int(work[0]) 250 251 impl.Dorgbr(lapack.GeneratePT, m, m, m, a, n, nil, work, -1) 252 lwork_dorgbr_p := int(work[0]) 253 254 impl.Dorgbr(lapack.GenerateQ, m, m, m, a, n, nil, work, -1) 255 lwork_dorgbr_q := int(work[0]) 256 257 if n >= mnthr { 258 if wantvn { 259 // Path 1t (n much larger than m, jobVT == None) 260 maxwrk = m + lwork_dgelqf 261 maxwrk = max(maxwrk, 3*m+lwork_dgebrd) 262 if wantuo || wantuas { 263 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q) 264 } 265 maxwrk = max(maxwrk, bdspac) 266 } else if wantvo && wantun { 267 // Path 2t (n much larger than m, jobU == None, jobVT == Overwrite) 268 wrkbl = m + lwork_dgelqf 269 wrkbl = max(wrkbl, m+lwork_dorglq_m) 270 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 271 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 272 wrkbl = max(wrkbl, bdspac) 273 maxwrk = max(m*m+wrkbl, m*m+m*n+m) 274 } else if wantvo && wantuas { 275 // Path 3t (n much larger than m, jobU == Store or All, jobVT == Overwrite) 276 wrkbl = m + lwork_dgelqf 277 wrkbl = max(wrkbl, m+lwork_dorglq_m) 278 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 279 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 280 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 281 wrkbl = max(wrkbl, bdspac) 282 maxwrk = max(m*m+wrkbl, m*m+m*n+m) 283 } else if wantvs && wantun { 284 // Path 4t (n much larger than m, jobU == None, jobVT == Store) 285 wrkbl = m + lwork_dgelqf 286 wrkbl = max(wrkbl, m+lwork_dorglq_m) 287 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 288 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 289 wrkbl = max(wrkbl, bdspac) 290 maxwrk = m*m + wrkbl 291 } else if wantvs && wantuo { 292 // Path 5t (n much larger than m, jobU == Overwrite, jobVT == Store) 293 wrkbl = m + lwork_dgelqf 294 wrkbl = max(wrkbl, m+lwork_dorglq_m) 295 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 296 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 297 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 298 wrkbl = max(wrkbl, bdspac) 299 maxwrk = 2*m*m + wrkbl 300 } else if wantvs && wantuas { 301 // Path 6t (n much larger than m, jobU == Store or All, jobVT == Store) 302 wrkbl = m + lwork_dgelqf 303 wrkbl = max(wrkbl, m+lwork_dorglq_m) 304 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 305 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 306 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 307 wrkbl = max(wrkbl, bdspac) 308 maxwrk = m*m + wrkbl 309 } else if wantva && wantun { 310 // Path 7t (n much larger than m, jobU== None, jobVT == All) 311 wrkbl = m + lwork_dgelqf 312 wrkbl = max(wrkbl, m+lwork_dorglq_n) 313 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 314 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 315 wrkbl = max(wrkbl, bdspac) 316 maxwrk = m*m + wrkbl 317 } else if wantva && wantuo { 318 // Path 8t (n much larger than m, jobU == Overwrite, jobVT == All) 319 wrkbl = m + lwork_dgelqf 320 wrkbl = max(wrkbl, m+lwork_dorglq_n) 321 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 322 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 323 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 324 wrkbl = max(wrkbl, bdspac) 325 maxwrk = 2*m*m + wrkbl 326 } else if wantva && wantuas { 327 // Path 9t (n much larger than m, jobU == Store or All, jobVT == All) 328 wrkbl = m + lwork_dgelqf 329 wrkbl = max(wrkbl, m+lwork_dorglq_n) 330 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 331 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 332 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 333 wrkbl = max(wrkbl, bdspac) 334 maxwrk = m*m + wrkbl 335 } 336 } else { 337 // Path 10t (n greater than m, but not much larger) 338 impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1) 339 lwork_dgebrd = int(work[0]) 340 maxwrk = 3*m + lwork_dgebrd 341 if wantvs || wantvo { 342 impl.Dorgbr(lapack.GeneratePT, m, n, m, a, n, nil, work, -1) 343 lwork_dorgbr_p = int(work[0]) 344 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p) 345 } 346 if wantva { 347 impl.Dorgbr(lapack.GeneratePT, n, n, m, a, n, nil, work, -1) 348 lwork_dorgbr_p = int(work[0]) 349 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p) 350 } 351 if !wantun { 352 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q) 353 } 354 maxwrk = max(maxwrk, bdspac) 355 } 356 } 357 358 maxwrk = max(maxwrk, minwork) 359 if lwork == -1 { 360 work[0] = float64(maxwrk) 361 return true 362 } 363 364 if len(a) < (m-1)*lda+n { 365 panic(shortA) 366 } 367 if len(s) < minmn { 368 panic(shortS) 369 } 370 if (len(u) < (m-1)*ldu+m && wantua) || (len(u) < (m-1)*ldu+minmn && wantus) { 371 panic(shortU) 372 } 373 if (len(vt) < (n-1)*ldvt+n && wantva) || (len(vt) < (minmn-1)*ldvt+n && wantvs) { 374 panic(shortVT) 375 } 376 377 // Perform decomposition. 378 eps := dlamchE 379 smlnum := math.Sqrt(dlamchS) / eps 380 bignum := 1 / smlnum 381 382 // Scale A if max element outside range [smlnum, bignum]. 383 anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil) 384 var iscl bool 385 if anrm > 0 && anrm < smlnum { 386 iscl = true 387 impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda) 388 } else if anrm > bignum { 389 iscl = true 390 impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda) 391 } 392 393 bi := blas64.Implementation() 394 var ie int 395 if m >= n { 396 // If A has sufficiently more rows than columns, use the QR decomposition. 397 if m >= mnthr { 398 // m >> n 399 if wantun { 400 // Path 1. 401 itau := 0 402 iwork := itau + n 403 404 // Compute A = Q * R. 405 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 406 407 // Zero out below R. 408 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 409 ie = 0 410 itauq := ie + n 411 itaup := itauq + n 412 iwork = itaup + n 413 // Bidiagonalize R in A. 414 impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:], 415 work[itaup:], work[iwork:], lwork-iwork) 416 ncvt := 0 417 if wantvo || wantvas { 418 impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, work[itaup:], 419 work[iwork:], lwork-iwork) 420 ncvt = n 421 } 422 iwork = ie + n 423 424 // Perform bidiagonal QR iteration computing right singular vectors 425 // of A in A if desired. 426 ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:], 427 a, lda, work, 1, work, 1, work[iwork:]) 428 429 // If right singular vectors desired in VT, copy them there. 430 if wantvas { 431 impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt) 432 } 433 } else if wantuo && wantvn { 434 // Path 2 435 panic(noSVDO) 436 } else if wantuo && wantvas { 437 // Path 3 438 panic(noSVDO) 439 } else if wantus { 440 if wantvn { 441 // Path 4 442 if lwork >= n*n+max(4*n, bdspac) { 443 // Sufficient workspace for a fast algorithm. 444 ir := 0 445 var ldworkr int 446 if lwork >= wrkbl+lda*n { 447 ldworkr = lda 448 } else { 449 ldworkr = n 450 } 451 itau := ir + ldworkr*n 452 iwork := itau + n 453 // Compute A = Q * R. 454 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 455 456 // Copy R to work[ir:], zeroing out below it. 457 impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr) 458 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr) 459 460 // Generate Q in A. 461 impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 462 ie := itau 463 itauq := ie + n 464 itaup := itauq + n 465 iwork = itaup + n 466 467 // Bidiagonalize R in work[ir:]. 468 impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:], 469 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 470 471 // Generate left vectors bidiagonalizing R in work[ir:]. 472 impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr, 473 work[itauq:], work[iwork:], lwork-iwork) 474 iwork = ie + n 475 476 // Perform bidiagonal QR iteration, compuing left singular 477 // vectors of R in work[ir:]. 478 ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1, 479 work[ir:], ldworkr, work, 1, work[iwork:]) 480 481 // Multiply Q in A by left singular vectors of R in 482 // work[ir:], storing result in U. 483 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda, 484 work[ir:], ldworkr, 0, u, ldu) 485 } else { 486 // Insufficient workspace for a fast algorithm. 487 itau := 0 488 iwork := itau + n 489 490 // Compute A = Q*R, copying result to U. 491 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 492 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 493 494 // Generate Q in U. 495 impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 496 ie := itau 497 itauq := ie + n 498 itaup := itauq + n 499 iwork = itaup + n 500 501 // Zero out below R in A. 502 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 503 504 // Bidiagonalize R in A. 505 impl.Dgebrd(n, n, a, lda, s, work[ie:], 506 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 507 508 // Multiply Q in U by left vectors bidiagonalizing R. 509 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 510 a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 511 iwork = ie + n 512 513 // Perform bidiagonal QR iteration, computing left 514 // singular vectors of A in U. 515 ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1, 516 u, ldu, work, 1, work[iwork:]) 517 } 518 } else if wantvo { 519 // Path 5 520 panic(noSVDO) 521 } else if wantvas { 522 // Path 6 523 if lwork >= n*n+max(4*n, bdspac) { 524 // Sufficient workspace for a fast algorithm. 525 iu := 0 526 var ldworku int 527 if lwork >= wrkbl+lda*n { 528 ldworku = lda 529 } else { 530 ldworku = n 531 } 532 itau := iu + ldworku*n 533 iwork := itau + n 534 535 // Compute A = Q * R. 536 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 537 // Copy R to work[iu:], zeroing out below it. 538 impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku) 539 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku) 540 541 // Generate Q in A. 542 impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 543 544 ie := itau 545 itauq := ie + n 546 itaup := itauq + n 547 iwork = itaup + n 548 549 // Bidiagonalize R in work[iu:], copying result to VT. 550 impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:], 551 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 552 impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt) 553 554 // Generate left bidiagonalizing vectors in work[iu:]. 555 impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku, 556 work[itauq:], work[iwork:], lwork-iwork) 557 558 // Generate right bidiagonalizing vectors in VT. 559 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 560 work[itaup:], work[iwork:], lwork-iwork) 561 iwork = ie + n 562 563 // Perform bidiagonal QR iteration, computing left singular 564 // vectors of R in work[iu:], and computing right singular 565 // vectors of R in VT. 566 ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:], 567 vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:]) 568 569 // Multiply Q in A by left singular vectors of R in 570 // work[iu:], storing result in U. 571 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda, 572 work[iu:], ldworku, 0, u, ldu) 573 } else { 574 // Insufficient workspace for a fast algorithm. 575 itau := 0 576 iwork := itau + n 577 578 // Compute A = Q * R, copying result to U. 579 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 580 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 581 582 // Generate Q in U. 583 impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 584 585 // Copy R to VT, zeroing out below it. 586 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 587 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt) 588 589 ie := itau 590 itauq := ie + n 591 itaup := itauq + n 592 iwork = itaup + n 593 594 // Bidiagonalize R in VT. 595 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 596 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 597 598 // Multiply Q in U by left bidiagonalizing vectors in VT. 599 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 600 vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 601 602 // Generate right bidiagonalizing vectors in VT. 603 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 604 work[itaup:], work[iwork:], lwork-iwork) 605 iwork = ie + n 606 607 // Perform bidiagonal QR iteration, computing left singular 608 // vectors of A in U and computing right singular vectors 609 // of A in VT. 610 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 611 vt, ldvt, u, ldu, work, 1, work[iwork:]) 612 } 613 } 614 } else if wantua { 615 if wantvn { 616 // Path 7 617 if lwork >= n*n+max(max(n+m, 4*n), bdspac) { 618 // Sufficient workspace for a fast algorithm. 619 ir := 0 620 var ldworkr int 621 if lwork >= wrkbl+lda*n { 622 ldworkr = lda 623 } else { 624 ldworkr = n 625 } 626 itau := ir + ldworkr*n 627 iwork := itau + n 628 629 // Compute A = Q*R, copying result to U. 630 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 631 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 632 633 // Copy R to work[ir:], zeroing out below it. 634 impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr) 635 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr) 636 637 // Generate Q in U. 638 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 639 ie := itau 640 itauq := ie + n 641 itaup := itauq + n 642 iwork = itaup + n 643 644 // Bidiagonalize R in work[ir:]. 645 impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:], 646 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 647 648 // Generate left bidiagonalizing vectors in work[ir:]. 649 impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr, 650 work[itauq:], work[iwork:], lwork-iwork) 651 iwork = ie + n 652 653 // Perform bidiagonal QR iteration, computing left singular 654 // vectors of R in work[ir:]. 655 ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1, 656 work[ir:], ldworkr, work, 1, work[iwork:]) 657 658 // Multiply Q in U by left singular vectors of R in 659 // work[ir:], storing result in A. 660 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu, 661 work[ir:], ldworkr, 0, a, lda) 662 663 // Copy left singular vectors of A from A to U. 664 impl.Dlacpy(blas.All, m, n, a, lda, u, ldu) 665 } else { 666 // Insufficient workspace for a fast algorithm. 667 itau := 0 668 iwork := itau + n 669 670 // Compute A = Q*R, copying result to U. 671 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 672 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 673 674 // Generate Q in U. 675 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 676 ie := itau 677 itauq := ie + n 678 itaup := itauq + n 679 iwork = itaup + n 680 681 // Zero out below R in A. 682 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 683 684 // Bidiagonalize R in A. 685 impl.Dgebrd(n, n, a, lda, s, work[ie:], 686 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 687 688 // Multiply Q in U by left bidiagonalizing vectors in A. 689 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 690 a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 691 iwork = ie + n 692 693 // Perform bidiagonal QR iteration, computing left 694 // singular vectors of A in U. 695 ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], 696 work, 1, u, ldu, work, 1, work[iwork:]) 697 } 698 } else if wantvo { 699 // Path 8. 700 panic(noSVDO) 701 } else if wantvas { 702 // Path 9. 703 if lwork >= n*n+max(max(n+m, 4*n), bdspac) { 704 // Sufficient workspace for a fast algorithm. 705 iu := 0 706 var ldworku int 707 if lwork >= wrkbl+lda*n { 708 ldworku = lda 709 } else { 710 ldworku = n 711 } 712 itau := iu + ldworku*n 713 iwork := itau + n 714 715 // Compute A = Q * R, copying result to U. 716 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 717 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 718 719 // Generate Q in U. 720 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 721 722 // Copy R to work[iu:], zeroing out below it. 723 impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku) 724 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku) 725 726 ie = itau 727 itauq := ie + n 728 itaup := itauq + n 729 iwork = itaup + n 730 731 // Bidiagonalize R in work[iu:], copying result to VT. 732 impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:], 733 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 734 impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt) 735 736 // Generate left bidiagonalizing vectors in work[iu:]. 737 impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku, 738 work[itauq:], work[iwork:], lwork-iwork) 739 740 // Generate right bidiagonalizing vectors in VT. 741 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 742 work[itaup:], work[iwork:], lwork-iwork) 743 iwork = ie + n 744 745 // Perform bidiagonal QR iteration, computing left singular 746 // vectors of R in work[iu:] and computing right 747 // singular vectors of R in VT. 748 ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:], 749 vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:]) 750 751 // Multiply Q in U by left singular vectors of R in 752 // work[iu:], storing result in A. 753 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, 754 u, ldu, work[iu:], ldworku, 0, a, lda) 755 756 // Copy left singular vectors of A from A to U. 757 impl.Dlacpy(blas.All, m, n, a, lda, u, ldu) 758 759 /* 760 // Bidiagonalize R in VT. 761 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 762 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 763 764 // Multiply Q in U by left bidiagonalizing vectors in VT. 765 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, 766 m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 767 768 // Generate right bidiagonalizing vectors in VT. 769 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 770 work[itaup:], work[iwork:], lwork-iwork) 771 iwork = ie + n 772 773 // Perform bidiagonal QR iteration, computing left singular 774 // vectors of A in U and computing right singular vectors 775 // of A in VT. 776 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 777 vt, ldvt, u, ldu, work, 1, work[iwork:]) 778 */ 779 } else { 780 // Insufficient workspace for a fast algorithm. 781 itau := 0 782 iwork := itau + n 783 784 // Compute A = Q*R, copying result to U. 785 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 786 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 787 788 // Generate Q in U. 789 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 790 791 // Copy R from A to VT, zeroing out below it. 792 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 793 if n > 1 { 794 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt) 795 } 796 797 ie := itau 798 itauq := ie + n 799 itaup := itauq + n 800 iwork = itaup + n 801 802 // Bidiagonalize R in VT. 803 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 804 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 805 806 // Multiply Q in U by left bidiagonalizing vectors in VT. 807 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, 808 m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 809 810 // Generate right bidiagonizing vectors in VT. 811 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 812 work[itaup:], work[iwork:], lwork-iwork) 813 iwork = ie + n 814 815 // Perform bidiagonal QR iteration, computing left singular 816 // vectors of A in U and computing right singular vectors 817 // of A in VT. 818 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 819 vt, ldvt, u, ldu, work, 1, work[iwork:]) 820 } 821 } 822 } 823 } else { 824 // Path 10. 825 // M at least N, but not much larger. 826 ie = 0 827 itauq := ie + n 828 itaup := itauq + n 829 iwork := itaup + n 830 831 // Bidiagonalize A. 832 impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], 833 work[itaup:], work[iwork:], lwork-iwork) 834 if wantuas { 835 // Left singular vectors are desired in U. Copy result to U and 836 // generate left biadiagonalizing vectors in U. 837 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 838 var ncu int 839 if wantus { 840 ncu = n 841 } 842 if wantua { 843 ncu = m 844 } 845 impl.Dorgbr(lapack.GenerateQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 846 } 847 if wantvas { 848 // Right singular vectors are desired in VT. Copy result to VT and 849 // generate left biadiagonalizing vectors in VT. 850 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 851 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork) 852 } 853 if wantuo { 854 panic(noSVDO) 855 } 856 if wantvo { 857 panic(noSVDO) 858 } 859 iwork = ie + n 860 var nru, ncvt int 861 if wantuas || wantuo { 862 nru = m 863 } 864 if wantun { 865 nru = 0 866 } 867 if wantvas || wantvo { 868 ncvt = n 869 } 870 if wantvn { 871 ncvt = 0 872 } 873 if !wantuo && !wantvo { 874 // Perform bidiagonal QR iteration, if desired, computing left 875 // singular vectors in U and right singular vectors in VT. 876 ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:], 877 vt, ldvt, u, ldu, work, 1, work[iwork:]) 878 } else { 879 // There will be two branches when the implementation is complete. 880 panic(noSVDO) 881 } 882 } 883 } else { 884 // A has more columns than rows. If A has sufficiently more columns than 885 // rows, first reduce using the LQ decomposition. 886 if n >= mnthr { 887 // n >> m. 888 if wantvn { 889 // Path 1t. 890 itau := 0 891 iwork := itau + m 892 893 // Compute A = L*Q. 894 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 895 896 // Zero out above L. 897 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 898 ie := 0 899 itauq := ie + m 900 itaup := itauq + m 901 iwork = itaup + m 902 903 // Bidiagonalize L in A. 904 impl.Dgebrd(m, m, a, lda, s, work[ie:itauq], 905 work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork) 906 if wantuo || wantuas { 907 impl.Dorgbr(lapack.GenerateQ, m, m, m, a, lda, 908 work[itauq:], work[iwork:], lwork-iwork) 909 } 910 iwork = ie + m 911 nru := 0 912 if wantuo || wantuas { 913 nru = m 914 } 915 916 // Perform bidiagonal QR iteration, computing left singular vectors 917 // of A in A if desired. 918 ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:], 919 work, 1, a, lda, work, 1, work[iwork:]) 920 921 // If left singular vectors desired in U, copy them there. 922 if wantuas { 923 impl.Dlacpy(blas.All, m, m, a, lda, u, ldu) 924 } 925 } else if wantvo && wantun { 926 // Path 2t. 927 panic(noSVDO) 928 } else if wantvo && wantuas { 929 // Path 3t. 930 panic(noSVDO) 931 } else if wantvs { 932 if wantun { 933 // Path 4t. 934 if lwork >= m*m+max(4*m, bdspac) { 935 // Sufficient workspace for a fast algorithm. 936 ir := 0 937 var ldworkr int 938 if lwork >= wrkbl+lda*m { 939 ldworkr = lda 940 } else { 941 ldworkr = m 942 } 943 itau := ir + ldworkr*m 944 iwork := itau + m 945 946 // Compute A = L*Q. 947 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 948 949 // Copy L to work[ir:], zeroing out above it. 950 impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr) 951 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr) 952 953 // Generate Q in A. 954 impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork) 955 ie := itau 956 itauq := ie + m 957 itaup := itauq + m 958 iwork = itaup + m 959 960 // Bidiagonalize L in work[ir:]. 961 impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:], 962 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 963 964 // Generate right vectors bidiagonalizing L in work[ir:]. 965 impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr, 966 work[itaup:], work[iwork:], lwork-iwork) 967 iwork = ie + m 968 969 // Perform bidiagonal QR iteration, computing right singular 970 // vectors of L in work[ir:]. 971 ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:], 972 work[ir:], ldworkr, work, 1, work, 1, work[iwork:]) 973 974 // Multiply right singular vectors of L in work[ir:] by 975 // Q in A, storing result in VT. 976 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 977 work[ir:], ldworkr, a, lda, 0, vt, ldvt) 978 } else { 979 // Insufficient workspace for a fast algorithm. 980 itau := 0 981 iwork := itau + m 982 983 // Compute A = L*Q. 984 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 985 986 // Copy result to VT. 987 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 988 989 // Generate Q in VT. 990 impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 991 ie := itau 992 itauq := ie + m 993 itaup := itauq + m 994 iwork = itaup + m 995 996 // Zero out above L in A. 997 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 998 999 // Bidiagonalize L in A. 1000 impl.Dgebrd(m, m, a, lda, s, work[ie:], 1001 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1002 1003 // Multiply right vectors bidiagonalizing L by Q in VT. 1004 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1005 a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1006 iwork = ie + m 1007 1008 // Perform bidiagonal QR iteration, computing right 1009 // singular vectors of A in VT. 1010 ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:], 1011 vt, ldvt, work, 1, work, 1, work[iwork:]) 1012 } 1013 } else if wantuo { 1014 // Path 5t. 1015 panic(noSVDO) 1016 } else if wantuas { 1017 // Path 6t. 1018 if lwork >= m*m+max(4*m, bdspac) { 1019 // Sufficient workspace for a fast algorithm. 1020 iu := 0 1021 var ldworku int 1022 if lwork >= wrkbl+lda*m { 1023 ldworku = lda 1024 } else { 1025 ldworku = m 1026 } 1027 itau := iu + ldworku*m 1028 iwork := itau + m 1029 1030 // Compute A = L*Q. 1031 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1032 1033 // Copy L to work[iu:], zeroing out above it. 1034 impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku) 1035 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku) 1036 1037 // Generate Q in A. 1038 impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork) 1039 ie := itau 1040 itauq := ie + m 1041 itaup := itauq + m 1042 iwork = itaup + m 1043 1044 // Bidiagonalize L in work[iu:], copying result to U. 1045 impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], 1046 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1047 impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu) 1048 1049 // Generate right bidiagionalizing vectors in work[iu:]. 1050 impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku, 1051 work[itaup:], work[iwork:], lwork-iwork) 1052 1053 // Generate left bidiagonalizing vectors in U. 1054 impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1055 iwork = ie + m 1056 1057 // Perform bidiagonal QR iteration, computing left singular 1058 // vectors of L in U and computing right singular vectors of 1059 // L in work[iu:]. 1060 ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], 1061 work[iu:], ldworku, u, ldu, work, 1, work[iwork:]) 1062 1063 // Multiply right singular vectors of L in work[iu:] by 1064 // Q in A, storing result in VT. 1065 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1066 work[iu:], ldworku, a, lda, 0, vt, ldvt) 1067 } else { 1068 // Insufficient workspace for a fast algorithm. 1069 itau := 0 1070 iwork := itau + m 1071 1072 // Compute A = L*Q, copying result to VT. 1073 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1074 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1075 1076 // Generate Q in VT. 1077 impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1078 1079 // Copy L to U, zeroing out above it. 1080 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1081 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu) 1082 1083 ie := itau 1084 itauq := ie + m 1085 itaup := itauq + m 1086 iwork = itaup + m 1087 1088 // Bidiagonalize L in U. 1089 impl.Dgebrd(m, m, u, ldu, s, work[ie:], 1090 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1091 1092 // Multiply right bidiagonalizing vectors in U by Q in VT. 1093 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1094 u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1095 1096 // Generate left bidiagonalizing vectors in U. 1097 impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1098 iwork = ie + m 1099 1100 // Perform bidiagonal QR iteration, computing left singular 1101 // vectors of A in U and computing right singular vectors 1102 // of A in VT. 1103 ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt, 1104 u, ldu, work, 1, work[iwork:]) 1105 } 1106 } 1107 } else if wantva { 1108 if wantun { 1109 // Path 7t. 1110 if lwork >= m*m+max(max(n+m, 4*m), bdspac) { 1111 // Sufficient workspace for a fast algorithm. 1112 ir := 0 1113 var ldworkr int 1114 if lwork >= wrkbl+lda*m { 1115 ldworkr = lda 1116 } else { 1117 ldworkr = m 1118 } 1119 itau := ir + ldworkr*m 1120 iwork := itau + m 1121 1122 // Compute A = L*Q, copying result to VT. 1123 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1124 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1125 1126 // Copy L to work[ir:], zeroing out above it. 1127 impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr) 1128 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr) 1129 1130 // Generate Q in VT. 1131 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1132 1133 ie := itau 1134 itauq := ie + m 1135 itaup := itauq + m 1136 iwork = itaup + m 1137 1138 // Bidiagonalize L in work[ir:]. 1139 impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:], 1140 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1141 1142 // Generate right bidiagonalizing vectors in work[ir:]. 1143 impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr, 1144 work[itaup:], work[iwork:], lwork-iwork) 1145 iwork = ie + m 1146 1147 // Perform bidiagonal QR iteration, computing right 1148 // singular vectors of L in work[ir:]. 1149 ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:], 1150 work[ir:], ldworkr, work, 1, work, 1, work[iwork:]) 1151 1152 // Multiply right singular vectors of L in work[ir:] by 1153 // Q in VT, storing result in A. 1154 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1155 work[ir:], ldworkr, vt, ldvt, 0, a, lda) 1156 1157 // Copy right singular vectors of A from A to VT. 1158 impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt) 1159 } else { 1160 // Insufficient workspace for a fast algorithm. 1161 itau := 0 1162 iwork := itau + m 1163 // Compute A = L * Q, copying result to VT. 1164 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1165 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1166 1167 // Generate Q in VT. 1168 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1169 1170 ie := itau 1171 itauq := ie + m 1172 itaup := itauq + m 1173 iwork = itaup + m 1174 1175 // Zero out above L in A. 1176 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 1177 1178 // Bidiagonalize L in A. 1179 impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:], 1180 work[itaup:], work[iwork:], lwork-iwork) 1181 1182 // Multiply right bidiagonalizing vectors in A by Q in VT. 1183 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1184 a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1185 iwork = ie + m 1186 1187 // Perform bidiagonal QR iteration, computing right singular 1188 // vectors of A in VT. 1189 ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:], 1190 vt, ldvt, work, 1, work, 1, work[iwork:]) 1191 } 1192 } else if wantuo { 1193 panic(noSVDO) 1194 } else if wantuas { 1195 // Path 9t. 1196 if lwork >= m*m+max(max(m+n, 4*m), bdspac) { 1197 // Sufficient workspace for a fast algorithm. 1198 iu := 0 1199 1200 var ldworku int 1201 if lwork >= wrkbl+lda*m { 1202 ldworku = lda 1203 } else { 1204 ldworku = m 1205 } 1206 itau := iu + ldworku*m 1207 iwork := itau + m 1208 1209 // Generate A = L * Q copying result to VT. 1210 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1211 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1212 1213 // Generate Q in VT. 1214 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1215 1216 // Copy L to work[iu:], zeroing out above it. 1217 impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku) 1218 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku) 1219 ie = itau 1220 itauq := ie + m 1221 itaup := itauq + m 1222 iwork = itaup + m 1223 1224 // Bidiagonalize L in work[iu:], copying result to U. 1225 impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], 1226 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1227 impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu) 1228 1229 // Generate right bidiagonalizing vectors in work[iu:]. 1230 impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku, 1231 work[itaup:], work[iwork:], lwork-iwork) 1232 1233 // Generate left bidiagonalizing vectors in U. 1234 impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1235 iwork = ie + m 1236 1237 // Perform bidiagonal QR iteration, computing left singular 1238 // vectors of L in U and computing right singular vectors 1239 // of L in work[iu:]. 1240 ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], 1241 work[iu:], ldworku, u, ldu, work, 1, work[iwork:]) 1242 1243 // Multiply right singular vectors of L in work[iu:] 1244 // Q in VT, storing result in A. 1245 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1246 work[iu:], ldworku, vt, ldvt, 0, a, lda) 1247 1248 // Copy right singular vectors of A from A to VT. 1249 impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt) 1250 } else { 1251 // Insufficient workspace for a fast algorithm. 1252 itau := 0 1253 iwork := itau + m 1254 1255 // Compute A = L * Q, copying result to VT. 1256 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1257 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1258 1259 // Generate Q in VT. 1260 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1261 1262 // Copy L to U, zeroing out above it. 1263 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1264 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu) 1265 1266 ie = itau 1267 itauq := ie + m 1268 itaup := itauq + m 1269 iwork = itaup + m 1270 1271 // Bidiagonalize L in U. 1272 impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:], 1273 work[itaup:], work[iwork:], lwork-iwork) 1274 1275 // Multiply right bidiagonalizing vectors in U by Q in VT. 1276 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1277 u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1278 1279 // Generate left bidiagonalizing vectors in U. 1280 impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1281 iwork = ie + m 1282 1283 // Perform bidiagonal QR iteration, computing left singular 1284 // vectors of A in U and computing right singular vectors 1285 // of A in VT. 1286 ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], 1287 vt, ldvt, u, ldu, work, 1, work[iwork:]) 1288 } 1289 } 1290 } 1291 } else { 1292 // Path 10t. 1293 // N at least M, but not much larger. 1294 ie = 0 1295 itauq := ie + m 1296 itaup := itauq + m 1297 iwork := itaup + m 1298 1299 // Bidiagonalize A. 1300 impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1301 if wantuas { 1302 // If left singular vectors desired in U, copy result to U and 1303 // generate left bidiagonalizing vectors in U. 1304 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1305 impl.Dorgbr(lapack.GenerateQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1306 } 1307 if wantvas { 1308 // If right singular vectors desired in VT, copy result to VT 1309 // and generate right bidiagonalizing vectors in VT. 1310 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1311 var nrvt int 1312 if wantva { 1313 nrvt = n 1314 } else { 1315 nrvt = m 1316 } 1317 impl.Dorgbr(lapack.GeneratePT, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork) 1318 } 1319 if wantuo { 1320 panic(noSVDO) 1321 } 1322 if wantvo { 1323 panic(noSVDO) 1324 } 1325 iwork = ie + m 1326 var nru, ncvt int 1327 if wantuas || wantuo { 1328 nru = m 1329 } 1330 if wantvas || wantvo { 1331 ncvt = n 1332 } 1333 if !wantuo && !wantvo { 1334 // Perform bidiagonal QR iteration, if desired, computing left 1335 // singular vectors in U and computing right singular vectors in 1336 // VT. 1337 ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:], 1338 vt, ldvt, u, ldu, work, 1, work[iwork:]) 1339 } else { 1340 // There will be two branches when the implementation is complete. 1341 panic(noSVDO) 1342 } 1343 } 1344 } 1345 if !ok { 1346 if ie > 1 { 1347 for i := 0; i < minmn-1; i++ { 1348 work[i+1] = work[i+ie] 1349 } 1350 } 1351 if ie < 1 { 1352 for i := minmn - 2; i >= 0; i-- { 1353 work[i+1] = work[i+ie] 1354 } 1355 } 1356 } 1357 // Undo scaling if necessary. 1358 if iscl { 1359 if anrm > bignum { 1360 impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn, s, minmn) 1361 } 1362 if !ok && anrm > bignum { 1363 impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn-1, work[1:], minmn) 1364 } 1365 if anrm < smlnum { 1366 impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn, s, minmn) 1367 } 1368 if !ok && anrm < smlnum { 1369 impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn-1, work[1:], minmn) 1370 } 1371 } 1372 work[0] = float64(maxwrk) 1373 return ok 1374 }