gonum.org/v1/gonum@v0.14.0/lapack/gonum/dgesvd.go (about) 1 // Copyright ©2015 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math" 9 10 "gonum.org/v1/gonum/blas" 11 "gonum.org/v1/gonum/blas/blas64" 12 "gonum.org/v1/gonum/lapack" 13 ) 14 15 const noSVDO = "dgesvd: not coded for overwrite" 16 17 // Dgesvd computes the singular value decomposition of the input matrix A. 18 // 19 // The singular value decomposition is 20 // 21 // A = U * Sigma * Vᵀ 22 // 23 // where Sigma is an m×n diagonal matrix containing the singular values of A, 24 // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first 25 // min(m,n) columns of U and V are the left and right singular vectors of A 26 // respectively. 27 // 28 // jobU and jobVT are options for computing the singular vectors. The behavior 29 // is as follows 30 // 31 // jobU == lapack.SVDAll All m columns of U are returned in u 32 // jobU == lapack.SVDStore The first min(m,n) columns are returned in u 33 // jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a 34 // jobU == lapack.SVDNone The columns of U are not computed. 35 // 36 // The behavior is the same for jobVT and the rows of Vᵀ. At most one of jobU 37 // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise. 38 // 39 // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd 40 // the data is overwritten. On exit, A contains the appropriate singular vectors 41 // if either job is lapack.SVDOverwrite. 42 // 43 // s is a slice of length at least min(m,n) and on exit contains the singular 44 // values in decreasing order. 45 // 46 // u contains the left singular vectors on exit, stored column-wise. If 47 // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDStore u is 48 // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is 49 // not used. 50 // 51 // vt contains the left singular vectors on exit, stored row-wise. If 52 // jobV == lapack.SVDAll, vt is of size n×n. If jobVT == lapack.SVDStore vt is 53 // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is 54 // not used. 55 // 56 // work is a slice for storing temporary memory, and lwork is the usable size of 57 // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)). 58 // If lwork == -1, instead of performing Dgesvd, the optimal work length will be 59 // stored into work[0]. Dgesvd will panic if the working memory has insufficient 60 // storage. 61 // 62 // Dgesvd returns whether the decomposition successfully completed. 63 func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) { 64 if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite { 65 panic(noSVDO) 66 } 67 68 wantua := jobU == lapack.SVDAll 69 wantus := jobU == lapack.SVDStore 70 wantuas := wantua || wantus 71 wantuo := jobU == lapack.SVDOverwrite 72 wantun := jobU == lapack.SVDNone 73 if !(wantua || wantus || wantuo || wantun) { 74 panic(badSVDJob) 75 } 76 77 wantva := jobVT == lapack.SVDAll 78 wantvs := jobVT == lapack.SVDStore 79 wantvas := wantva || wantvs 80 wantvo := jobVT == lapack.SVDOverwrite 81 wantvn := jobVT == lapack.SVDNone 82 if !(wantva || wantvs || wantvo || wantvn) { 83 panic(badSVDJob) 84 } 85 86 if wantuo && wantvo { 87 panic(bothSVDOver) 88 } 89 90 minmn := min(m, n) 91 minwork := 1 92 if minmn > 0 { 93 minwork = max(3*minmn+max(m, n), 5*minmn) 94 } 95 switch { 96 case m < 0: 97 panic(mLT0) 98 case n < 0: 99 panic(nLT0) 100 case lda < max(1, n): 101 panic(badLdA) 102 case ldu < 1, wantua && ldu < m, wantus && ldu < minmn: 103 panic(badLdU) 104 case ldvt < 1 || (wantvas && ldvt < n): 105 panic(badLdVT) 106 case lwork < minwork && lwork != -1: 107 panic(badLWork) 108 case len(work) < max(1, lwork): 109 panic(shortWork) 110 } 111 112 // Quick return if possible. 113 if minmn == 0 { 114 work[0] = 1 115 return true 116 } 117 118 // Compute optimal workspace size for subroutines. 119 opts := string(jobU) + string(jobVT) 120 mnthr := impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0) 121 maxwrk := 1 122 var wrkbl, bdspac int 123 if m >= n { 124 bdspac = 5 * n 125 impl.Dgeqrf(m, n, a, lda, nil, work, -1) 126 lwork_dgeqrf := int(work[0]) 127 128 impl.Dorgqr(m, n, n, a, lda, nil, work, -1) 129 lwork_dorgqr_n := int(work[0]) 130 impl.Dorgqr(m, m, n, a, lda, nil, work, -1) 131 lwork_dorgqr_m := int(work[0]) 132 133 impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1) 134 lwork_dgebrd := int(work[0]) 135 136 impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, nil, work, -1) 137 lwork_dorgbr_p := int(work[0]) 138 139 impl.Dorgbr(lapack.GenerateQ, n, n, n, a, lda, nil, work, -1) 140 lwork_dorgbr_q := int(work[0]) 141 142 if m >= mnthr { 143 if wantun { 144 // Path 1 (m much larger than n, jobU == None) 145 maxwrk = n + lwork_dgeqrf 146 maxwrk = max(maxwrk, 3*n+lwork_dgebrd) 147 if wantvo || wantvas { 148 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p) 149 } 150 maxwrk = max(maxwrk, bdspac) 151 } else if wantuo && wantvn { 152 // Path 2 (m much larger than n, jobU == Overwrite, jobVT == None) 153 wrkbl = n + lwork_dgeqrf 154 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 155 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 156 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 157 wrkbl = max(wrkbl, bdspac) 158 maxwrk = max(n*n+wrkbl, n*n+m*n+n) 159 } else if wantuo && wantvas { 160 // Path 3 (m much larger than n, jobU == Overwrite, jobVT == Store or All) 161 wrkbl = n + lwork_dgeqrf 162 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 163 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 164 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 165 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 166 wrkbl = max(wrkbl, bdspac) 167 maxwrk = max(n*n+wrkbl, n*n+m*n+n) 168 } else if wantus && wantvn { 169 // Path 4 (m much larger than n, jobU == Store, jobVT == None) 170 wrkbl = n + lwork_dgeqrf 171 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 172 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 173 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 174 wrkbl = max(wrkbl, bdspac) 175 maxwrk = n*n + wrkbl 176 } else if wantus && wantvo { 177 // Path 5 (m much larger than n, jobU == Store, jobVT == Overwrite) 178 wrkbl = n + lwork_dgeqrf 179 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 180 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 181 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 182 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 183 wrkbl = max(wrkbl, bdspac) 184 maxwrk = 2*n*n + wrkbl 185 } else if wantus && wantvas { 186 // Path 6 (m much larger than n, jobU == Store, jobVT == Store or All) 187 wrkbl = n + lwork_dgeqrf 188 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 189 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 190 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 191 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 192 wrkbl = max(wrkbl, bdspac) 193 maxwrk = n*n + wrkbl 194 } else if wantua && wantvn { 195 // Path 7 (m much larger than n, jobU == All, jobVT == None) 196 wrkbl = n + lwork_dgeqrf 197 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 198 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 199 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 200 wrkbl = max(wrkbl, bdspac) 201 maxwrk = n*n + wrkbl 202 } else if wantua && wantvo { 203 // Path 8 (m much larger than n, jobU == All, jobVT == Overwrite) 204 wrkbl = n + lwork_dgeqrf 205 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 206 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 207 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 208 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 209 wrkbl = max(wrkbl, bdspac) 210 maxwrk = 2*n*n + wrkbl 211 } else if wantua && wantvas { 212 // Path 9 (m much larger than n, jobU == All, jobVT == Store or All) 213 wrkbl = n + lwork_dgeqrf 214 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 215 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 216 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 217 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 218 wrkbl = max(wrkbl, bdspac) 219 maxwrk = n*n + wrkbl 220 } 221 } else { 222 // Path 10 (m at least n, but not much larger) 223 impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1) 224 lwork_dgebrd := int(work[0]) 225 maxwrk = 3*n + lwork_dgebrd 226 if wantus || wantuo { 227 impl.Dorgbr(lapack.GenerateQ, m, n, n, a, lda, nil, work, -1) 228 lwork_dorgbr_q = int(work[0]) 229 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q) 230 } 231 if wantua { 232 impl.Dorgbr(lapack.GenerateQ, m, m, n, a, lda, nil, work, -1) 233 lwork_dorgbr_q := int(work[0]) 234 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q) 235 } 236 if !wantvn { 237 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p) 238 } 239 maxwrk = max(maxwrk, bdspac) 240 } 241 } else { 242 bdspac = 5 * m 243 244 impl.Dgelqf(m, n, a, lda, nil, work, -1) 245 lwork_dgelqf := int(work[0]) 246 247 impl.Dorglq(n, n, m, nil, n, nil, work, -1) 248 lwork_dorglq_n := int(work[0]) 249 impl.Dorglq(m, n, m, a, lda, nil, work, -1) 250 lwork_dorglq_m := int(work[0]) 251 252 impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1) 253 lwork_dgebrd := int(work[0]) 254 255 impl.Dorgbr(lapack.GeneratePT, m, m, m, a, n, nil, work, -1) 256 lwork_dorgbr_p := int(work[0]) 257 258 impl.Dorgbr(lapack.GenerateQ, m, m, m, a, n, nil, work, -1) 259 lwork_dorgbr_q := int(work[0]) 260 261 if n >= mnthr { 262 if wantvn { 263 // Path 1t (n much larger than m, jobVT == None) 264 maxwrk = m + lwork_dgelqf 265 maxwrk = max(maxwrk, 3*m+lwork_dgebrd) 266 if wantuo || wantuas { 267 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q) 268 } 269 maxwrk = max(maxwrk, bdspac) 270 } else if wantvo && wantun { 271 // Path 2t (n much larger than m, jobU == None, jobVT == Overwrite) 272 wrkbl = m + lwork_dgelqf 273 wrkbl = max(wrkbl, m+lwork_dorglq_m) 274 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 275 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 276 wrkbl = max(wrkbl, bdspac) 277 maxwrk = max(m*m+wrkbl, m*m+m*n+m) 278 } else if wantvo && wantuas { 279 // Path 3t (n much larger than m, jobU == Store or All, jobVT == Overwrite) 280 wrkbl = m + lwork_dgelqf 281 wrkbl = max(wrkbl, m+lwork_dorglq_m) 282 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 283 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 284 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 285 wrkbl = max(wrkbl, bdspac) 286 maxwrk = max(m*m+wrkbl, m*m+m*n+m) 287 } else if wantvs && wantun { 288 // Path 4t (n much larger than m, jobU == None, jobVT == Store) 289 wrkbl = m + lwork_dgelqf 290 wrkbl = max(wrkbl, m+lwork_dorglq_m) 291 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 292 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 293 wrkbl = max(wrkbl, bdspac) 294 maxwrk = m*m + wrkbl 295 } else if wantvs && wantuo { 296 // Path 5t (n much larger than m, jobU == Overwrite, jobVT == Store) 297 wrkbl = m + lwork_dgelqf 298 wrkbl = max(wrkbl, m+lwork_dorglq_m) 299 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 300 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 301 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 302 wrkbl = max(wrkbl, bdspac) 303 maxwrk = 2*m*m + wrkbl 304 } else if wantvs && wantuas { 305 // Path 6t (n much larger than m, jobU == Store or All, jobVT == Store) 306 wrkbl = m + lwork_dgelqf 307 wrkbl = max(wrkbl, m+lwork_dorglq_m) 308 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 309 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 310 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 311 wrkbl = max(wrkbl, bdspac) 312 maxwrk = m*m + wrkbl 313 } else if wantva && wantun { 314 // Path 7t (n much larger than m, jobU== None, jobVT == All) 315 wrkbl = m + lwork_dgelqf 316 wrkbl = max(wrkbl, m+lwork_dorglq_n) 317 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 318 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 319 wrkbl = max(wrkbl, bdspac) 320 maxwrk = m*m + wrkbl 321 } else if wantva && wantuo { 322 // Path 8t (n much larger than m, jobU == Overwrite, jobVT == All) 323 wrkbl = m + lwork_dgelqf 324 wrkbl = max(wrkbl, m+lwork_dorglq_n) 325 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 326 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 327 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 328 wrkbl = max(wrkbl, bdspac) 329 maxwrk = 2*m*m + wrkbl 330 } else if wantva && wantuas { 331 // Path 9t (n much larger than m, jobU == Store or All, jobVT == All) 332 wrkbl = m + lwork_dgelqf 333 wrkbl = max(wrkbl, m+lwork_dorglq_n) 334 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 335 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 336 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 337 wrkbl = max(wrkbl, bdspac) 338 maxwrk = m*m + wrkbl 339 } 340 } else { 341 // Path 10t (n greater than m, but not much larger) 342 impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1) 343 lwork_dgebrd = int(work[0]) 344 maxwrk = 3*m + lwork_dgebrd 345 if wantvs || wantvo { 346 impl.Dorgbr(lapack.GeneratePT, m, n, m, a, n, nil, work, -1) 347 lwork_dorgbr_p = int(work[0]) 348 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p) 349 } 350 if wantva { 351 impl.Dorgbr(lapack.GeneratePT, n, n, m, a, n, nil, work, -1) 352 lwork_dorgbr_p = int(work[0]) 353 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p) 354 } 355 if !wantun { 356 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q) 357 } 358 maxwrk = max(maxwrk, bdspac) 359 } 360 } 361 362 maxwrk = max(maxwrk, minwork) 363 if lwork == -1 { 364 work[0] = float64(maxwrk) 365 return true 366 } 367 368 if len(a) < (m-1)*lda+n { 369 panic(shortA) 370 } 371 if len(s) < minmn { 372 panic(shortS) 373 } 374 if (len(u) < (m-1)*ldu+m && wantua) || (len(u) < (m-1)*ldu+minmn && wantus) { 375 panic(shortU) 376 } 377 if (len(vt) < (n-1)*ldvt+n && wantva) || (len(vt) < (minmn-1)*ldvt+n && wantvs) { 378 panic(shortVT) 379 } 380 381 // Perform decomposition. 382 eps := dlamchE 383 smlnum := math.Sqrt(dlamchS) / eps 384 bignum := 1 / smlnum 385 386 // Scale A if max element outside range [smlnum, bignum]. 387 anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil) 388 var iscl bool 389 if anrm > 0 && anrm < smlnum { 390 iscl = true 391 impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda) 392 } else if anrm > bignum { 393 iscl = true 394 impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda) 395 } 396 397 bi := blas64.Implementation() 398 var ie int 399 if m >= n { 400 // If A has sufficiently more rows than columns, use the QR decomposition. 401 if m >= mnthr { 402 // m >> n 403 if wantun { 404 // Path 1. 405 itau := 0 406 iwork := itau + n 407 408 // Compute A = Q * R. 409 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 410 411 // Zero out below R. 412 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 413 ie = 0 414 itauq := ie + n 415 itaup := itauq + n 416 iwork = itaup + n 417 // Bidiagonalize R in A. 418 impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:], 419 work[itaup:], work[iwork:], lwork-iwork) 420 ncvt := 0 421 if wantvo || wantvas { 422 impl.Dorgbr(lapack.GeneratePT, n, n, n, a, lda, work[itaup:], 423 work[iwork:], lwork-iwork) 424 ncvt = n 425 } 426 iwork = ie + n 427 428 // Perform bidiagonal QR iteration computing right singular vectors 429 // of A in A if desired. 430 ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:], 431 a, lda, work, 1, work, 1, work[iwork:]) 432 433 // If right singular vectors desired in VT, copy them there. 434 if wantvas { 435 impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt) 436 } 437 } else if wantuo && wantvn { 438 // Path 2 439 panic(noSVDO) 440 } else if wantuo && wantvas { 441 // Path 3 442 panic(noSVDO) 443 } else if wantus { 444 if wantvn { 445 // Path 4 446 if lwork >= n*n+max(4*n, bdspac) { 447 // Sufficient workspace for a fast algorithm. 448 ir := 0 449 var ldworkr int 450 if lwork >= wrkbl+lda*n { 451 ldworkr = lda 452 } else { 453 ldworkr = n 454 } 455 itau := ir + ldworkr*n 456 iwork := itau + n 457 // Compute A = Q * R. 458 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 459 460 // Copy R to work[ir:], zeroing out below it. 461 impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr) 462 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr) 463 464 // Generate Q in A. 465 impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 466 ie := itau 467 itauq := ie + n 468 itaup := itauq + n 469 iwork = itaup + n 470 471 // Bidiagonalize R in work[ir:]. 472 impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:], 473 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 474 475 // Generate left vectors bidiagonalizing R in work[ir:]. 476 impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr, 477 work[itauq:], work[iwork:], lwork-iwork) 478 iwork = ie + n 479 480 // Perform bidiagonal QR iteration, computing left singular 481 // vectors of R in work[ir:]. 482 ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1, 483 work[ir:], ldworkr, work, 1, work[iwork:]) 484 485 // Multiply Q in A by left singular vectors of R in 486 // work[ir:], storing result in U. 487 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda, 488 work[ir:], ldworkr, 0, u, ldu) 489 } else { 490 // Insufficient workspace for a fast algorithm. 491 itau := 0 492 iwork := itau + n 493 494 // Compute A = Q*R, copying result to U. 495 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 496 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 497 498 // Generate Q in U. 499 impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 500 ie := itau 501 itauq := ie + n 502 itaup := itauq + n 503 iwork = itaup + n 504 505 // Zero out below R in A. 506 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 507 508 // Bidiagonalize R in A. 509 impl.Dgebrd(n, n, a, lda, s, work[ie:], 510 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 511 512 // Multiply Q in U by left vectors bidiagonalizing R. 513 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 514 a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 515 iwork = ie + n 516 517 // Perform bidiagonal QR iteration, computing left 518 // singular vectors of A in U. 519 ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1, 520 u, ldu, work, 1, work[iwork:]) 521 } 522 } else if wantvo { 523 // Path 5 524 panic(noSVDO) 525 } else if wantvas { 526 // Path 6 527 if lwork >= n*n+max(4*n, bdspac) { 528 // Sufficient workspace for a fast algorithm. 529 iu := 0 530 var ldworku int 531 if lwork >= wrkbl+lda*n { 532 ldworku = lda 533 } else { 534 ldworku = n 535 } 536 itau := iu + ldworku*n 537 iwork := itau + n 538 539 // Compute A = Q * R. 540 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 541 // Copy R to work[iu:], zeroing out below it. 542 impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku) 543 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku) 544 545 // Generate Q in A. 546 impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 547 548 ie := itau 549 itauq := ie + n 550 itaup := itauq + n 551 iwork = itaup + n 552 553 // Bidiagonalize R in work[iu:], copying result to VT. 554 impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:], 555 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 556 impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt) 557 558 // Generate left bidiagonalizing vectors in work[iu:]. 559 impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku, 560 work[itauq:], work[iwork:], lwork-iwork) 561 562 // Generate right bidiagonalizing vectors in VT. 563 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 564 work[itaup:], work[iwork:], lwork-iwork) 565 iwork = ie + n 566 567 // Perform bidiagonal QR iteration, computing left singular 568 // vectors of R in work[iu:], and computing right singular 569 // vectors of R in VT. 570 ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:], 571 vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:]) 572 573 // Multiply Q in A by left singular vectors of R in 574 // work[iu:], storing result in U. 575 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda, 576 work[iu:], ldworku, 0, u, ldu) 577 } else { 578 // Insufficient workspace for a fast algorithm. 579 itau := 0 580 iwork := itau + n 581 582 // Compute A = Q * R, copying result to U. 583 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 584 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 585 586 // Generate Q in U. 587 impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 588 589 // Copy R to VT, zeroing out below it. 590 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 591 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt) 592 593 ie := itau 594 itauq := ie + n 595 itaup := itauq + n 596 iwork = itaup + n 597 598 // Bidiagonalize R in VT. 599 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 600 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 601 602 // Multiply Q in U by left bidiagonalizing vectors in VT. 603 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 604 vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 605 606 // Generate right bidiagonalizing vectors in VT. 607 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 608 work[itaup:], work[iwork:], lwork-iwork) 609 iwork = ie + n 610 611 // Perform bidiagonal QR iteration, computing left singular 612 // vectors of A in U and computing right singular vectors 613 // of A in VT. 614 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 615 vt, ldvt, u, ldu, work, 1, work[iwork:]) 616 } 617 } 618 } else if wantua { 619 if wantvn { 620 // Path 7 621 if lwork >= n*n+max(max(n+m, 4*n), bdspac) { 622 // Sufficient workspace for a fast algorithm. 623 ir := 0 624 var ldworkr int 625 if lwork >= wrkbl+lda*n { 626 ldworkr = lda 627 } else { 628 ldworkr = n 629 } 630 itau := ir + ldworkr*n 631 iwork := itau + n 632 633 // Compute A = Q*R, copying result to U. 634 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 635 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 636 637 // Copy R to work[ir:], zeroing out below it. 638 impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr) 639 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr) 640 641 // Generate Q in U. 642 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 643 ie := itau 644 itauq := ie + n 645 itaup := itauq + n 646 iwork = itaup + n 647 648 // Bidiagonalize R in work[ir:]. 649 impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:], 650 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 651 652 // Generate left bidiagonalizing vectors in work[ir:]. 653 impl.Dorgbr(lapack.GenerateQ, n, n, n, work[ir:], ldworkr, 654 work[itauq:], work[iwork:], lwork-iwork) 655 iwork = ie + n 656 657 // Perform bidiagonal QR iteration, computing left singular 658 // vectors of R in work[ir:]. 659 ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1, 660 work[ir:], ldworkr, work, 1, work[iwork:]) 661 662 // Multiply Q in U by left singular vectors of R in 663 // work[ir:], storing result in A. 664 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu, 665 work[ir:], ldworkr, 0, a, lda) 666 667 // Copy left singular vectors of A from A to U. 668 impl.Dlacpy(blas.All, m, n, a, lda, u, ldu) 669 } else { 670 // Insufficient workspace for a fast algorithm. 671 itau := 0 672 iwork := itau + n 673 674 // Compute A = Q*R, copying result to U. 675 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 676 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 677 678 // Generate Q in U. 679 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 680 ie := itau 681 itauq := ie + n 682 itaup := itauq + n 683 iwork = itaup + n 684 685 // Zero out below R in A. 686 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 687 688 // Bidiagonalize R in A. 689 impl.Dgebrd(n, n, a, lda, s, work[ie:], 690 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 691 692 // Multiply Q in U by left bidiagonalizing vectors in A. 693 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 694 a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 695 iwork = ie + n 696 697 // Perform bidiagonal QR iteration, computing left 698 // singular vectors of A in U. 699 ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], 700 work, 1, u, ldu, work, 1, work[iwork:]) 701 } 702 } else if wantvo { 703 // Path 8. 704 panic(noSVDO) 705 } else if wantvas { 706 // Path 9. 707 if lwork >= n*n+max(max(n+m, 4*n), bdspac) { 708 // Sufficient workspace for a fast algorithm. 709 iu := 0 710 var ldworku int 711 if lwork >= wrkbl+lda*n { 712 ldworku = lda 713 } else { 714 ldworku = n 715 } 716 itau := iu + ldworku*n 717 iwork := itau + n 718 719 // Compute A = Q * R, copying result to U. 720 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 721 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 722 723 // Generate Q in U. 724 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 725 726 // Copy R to work[iu:], zeroing out below it. 727 impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku) 728 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku) 729 730 ie = itau 731 itauq := ie + n 732 itaup := itauq + n 733 iwork = itaup + n 734 735 // Bidiagonalize R in work[iu:], copying result to VT. 736 impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:], 737 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 738 impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt) 739 740 // Generate left bidiagonalizing vectors in work[iu:]. 741 impl.Dorgbr(lapack.GenerateQ, n, n, n, work[iu:], ldworku, 742 work[itauq:], work[iwork:], lwork-iwork) 743 744 // Generate right bidiagonalizing vectors in VT. 745 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 746 work[itaup:], work[iwork:], lwork-iwork) 747 iwork = ie + n 748 749 // Perform bidiagonal QR iteration, computing left singular 750 // vectors of R in work[iu:] and computing right 751 // singular vectors of R in VT. 752 ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:], 753 vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:]) 754 755 // Multiply Q in U by left singular vectors of R in 756 // work[iu:], storing result in A. 757 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, 758 u, ldu, work[iu:], ldworku, 0, a, lda) 759 760 // Copy left singular vectors of A from A to U. 761 impl.Dlacpy(blas.All, m, n, a, lda, u, ldu) 762 763 /* 764 // Bidiagonalize R in VT. 765 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 766 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 767 768 // Multiply Q in U by left bidiagonalizing vectors in VT. 769 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, 770 m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 771 772 // Generate right bidiagonalizing vectors in VT. 773 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 774 work[itaup:], work[iwork:], lwork-iwork) 775 iwork = ie + n 776 777 // Perform bidiagonal QR iteration, computing left singular 778 // vectors of A in U and computing right singular vectors 779 // of A in VT. 780 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 781 vt, ldvt, u, ldu, work, 1, work[iwork:]) 782 */ 783 } else { 784 // Insufficient workspace for a fast algorithm. 785 itau := 0 786 iwork := itau + n 787 788 // Compute A = Q*R, copying result to U. 789 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 790 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 791 792 // Generate Q in U. 793 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 794 795 // Copy R from A to VT, zeroing out below it. 796 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 797 if n > 1 { 798 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt) 799 } 800 801 ie := itau 802 itauq := ie + n 803 itaup := itauq + n 804 iwork = itaup + n 805 806 // Bidiagonalize R in VT. 807 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 808 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 809 810 // Multiply Q in U by left bidiagonalizing vectors in VT. 811 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, 812 m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 813 814 // Generate right bidiagonizing vectors in VT. 815 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, 816 work[itaup:], work[iwork:], lwork-iwork) 817 iwork = ie + n 818 819 // Perform bidiagonal QR iteration, computing left singular 820 // vectors of A in U and computing right singular vectors 821 // of A in VT. 822 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 823 vt, ldvt, u, ldu, work, 1, work[iwork:]) 824 } 825 } 826 } 827 } else { 828 // Path 10. 829 // M at least N, but not much larger. 830 ie = 0 831 itauq := ie + n 832 itaup := itauq + n 833 iwork := itaup + n 834 835 // Bidiagonalize A. 836 impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], 837 work[itaup:], work[iwork:], lwork-iwork) 838 if wantuas { 839 // Left singular vectors are desired in U. Copy result to U and 840 // generate left biadiagonalizing vectors in U. 841 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 842 var ncu int 843 if wantus { 844 ncu = n 845 } 846 if wantua { 847 ncu = m 848 } 849 impl.Dorgbr(lapack.GenerateQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 850 } 851 if wantvas { 852 // Right singular vectors are desired in VT. Copy result to VT and 853 // generate left biadiagonalizing vectors in VT. 854 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 855 impl.Dorgbr(lapack.GeneratePT, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork) 856 } 857 if wantuo { 858 panic(noSVDO) 859 } 860 if wantvo { 861 panic(noSVDO) 862 } 863 iwork = ie + n 864 var nru, ncvt int 865 if wantuas || wantuo { 866 nru = m 867 } 868 if wantun { 869 nru = 0 870 } 871 if wantvas || wantvo { 872 ncvt = n 873 } 874 if wantvn { 875 ncvt = 0 876 } 877 if !wantuo && !wantvo { 878 // Perform bidiagonal QR iteration, if desired, computing left 879 // singular vectors in U and right singular vectors in VT. 880 ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:], 881 vt, ldvt, u, ldu, work, 1, work[iwork:]) 882 } else { 883 // There will be two branches when the implementation is complete. 884 panic(noSVDO) 885 } 886 } 887 } else { 888 // A has more columns than rows. If A has sufficiently more columns than 889 // rows, first reduce using the LQ decomposition. 890 if n >= mnthr { 891 // n >> m. 892 if wantvn { 893 // Path 1t. 894 itau := 0 895 iwork := itau + m 896 897 // Compute A = L*Q. 898 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 899 900 // Zero out above L. 901 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 902 ie := 0 903 itauq := ie + m 904 itaup := itauq + m 905 iwork = itaup + m 906 907 // Bidiagonalize L in A. 908 impl.Dgebrd(m, m, a, lda, s, work[ie:itauq], 909 work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork) 910 if wantuo || wantuas { 911 impl.Dorgbr(lapack.GenerateQ, m, m, m, a, lda, 912 work[itauq:], work[iwork:], lwork-iwork) 913 } 914 iwork = ie + m 915 nru := 0 916 if wantuo || wantuas { 917 nru = m 918 } 919 920 // Perform bidiagonal QR iteration, computing left singular vectors 921 // of A in A if desired. 922 ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:], 923 work, 1, a, lda, work, 1, work[iwork:]) 924 925 // If left singular vectors desired in U, copy them there. 926 if wantuas { 927 impl.Dlacpy(blas.All, m, m, a, lda, u, ldu) 928 } 929 } else if wantvo && wantun { 930 // Path 2t. 931 panic(noSVDO) 932 } else if wantvo && wantuas { 933 // Path 3t. 934 panic(noSVDO) 935 } else if wantvs { 936 if wantun { 937 // Path 4t. 938 if lwork >= m*m+max(4*m, bdspac) { 939 // Sufficient workspace for a fast algorithm. 940 ir := 0 941 var ldworkr int 942 if lwork >= wrkbl+lda*m { 943 ldworkr = lda 944 } else { 945 ldworkr = m 946 } 947 itau := ir + ldworkr*m 948 iwork := itau + m 949 950 // Compute A = L*Q. 951 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 952 953 // Copy L to work[ir:], zeroing out above it. 954 impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr) 955 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr) 956 957 // Generate Q in A. 958 impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork) 959 ie := itau 960 itauq := ie + m 961 itaup := itauq + m 962 iwork = itaup + m 963 964 // Bidiagonalize L in work[ir:]. 965 impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:], 966 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 967 968 // Generate right vectors bidiagonalizing L in work[ir:]. 969 impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr, 970 work[itaup:], work[iwork:], lwork-iwork) 971 iwork = ie + m 972 973 // Perform bidiagonal QR iteration, computing right singular 974 // vectors of L in work[ir:]. 975 ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:], 976 work[ir:], ldworkr, work, 1, work, 1, work[iwork:]) 977 978 // Multiply right singular vectors of L in work[ir:] by 979 // Q in A, storing result in VT. 980 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 981 work[ir:], ldworkr, a, lda, 0, vt, ldvt) 982 } else { 983 // Insufficient workspace for a fast algorithm. 984 itau := 0 985 iwork := itau + m 986 987 // Compute A = L*Q. 988 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 989 990 // Copy result to VT. 991 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 992 993 // Generate Q in VT. 994 impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 995 ie := itau 996 itauq := ie + m 997 itaup := itauq + m 998 iwork = itaup + m 999 1000 // Zero out above L in A. 1001 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 1002 1003 // Bidiagonalize L in A. 1004 impl.Dgebrd(m, m, a, lda, s, work[ie:], 1005 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1006 1007 // Multiply right vectors bidiagonalizing L by Q in VT. 1008 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1009 a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1010 iwork = ie + m 1011 1012 // Perform bidiagonal QR iteration, computing right 1013 // singular vectors of A in VT. 1014 ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:], 1015 vt, ldvt, work, 1, work, 1, work[iwork:]) 1016 } 1017 } else if wantuo { 1018 // Path 5t. 1019 panic(noSVDO) 1020 } else if wantuas { 1021 // Path 6t. 1022 if lwork >= m*m+max(4*m, bdspac) { 1023 // Sufficient workspace for a fast algorithm. 1024 iu := 0 1025 var ldworku int 1026 if lwork >= wrkbl+lda*m { 1027 ldworku = lda 1028 } else { 1029 ldworku = m 1030 } 1031 itau := iu + ldworku*m 1032 iwork := itau + m 1033 1034 // Compute A = L*Q. 1035 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1036 1037 // Copy L to work[iu:], zeroing out above it. 1038 impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku) 1039 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku) 1040 1041 // Generate Q in A. 1042 impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork) 1043 ie := itau 1044 itauq := ie + m 1045 itaup := itauq + m 1046 iwork = itaup + m 1047 1048 // Bidiagonalize L in work[iu:], copying result to U. 1049 impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], 1050 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1051 impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu) 1052 1053 // Generate right bidiagionalizing vectors in work[iu:]. 1054 impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku, 1055 work[itaup:], work[iwork:], lwork-iwork) 1056 1057 // Generate left bidiagonalizing vectors in U. 1058 impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1059 iwork = ie + m 1060 1061 // Perform bidiagonal QR iteration, computing left singular 1062 // vectors of L in U and computing right singular vectors of 1063 // L in work[iu:]. 1064 ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], 1065 work[iu:], ldworku, u, ldu, work, 1, work[iwork:]) 1066 1067 // Multiply right singular vectors of L in work[iu:] by 1068 // Q in A, storing result in VT. 1069 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1070 work[iu:], ldworku, a, lda, 0, vt, ldvt) 1071 } else { 1072 // Insufficient workspace for a fast algorithm. 1073 itau := 0 1074 iwork := itau + m 1075 1076 // Compute A = L*Q, copying result to VT. 1077 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1078 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1079 1080 // Generate Q in VT. 1081 impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1082 1083 // Copy L to U, zeroing out above it. 1084 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1085 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu) 1086 1087 ie := itau 1088 itauq := ie + m 1089 itaup := itauq + m 1090 iwork = itaup + m 1091 1092 // Bidiagonalize L in U. 1093 impl.Dgebrd(m, m, u, ldu, s, work[ie:], 1094 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1095 1096 // Multiply right bidiagonalizing vectors in U by Q in VT. 1097 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1098 u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1099 1100 // Generate left bidiagonalizing vectors in U. 1101 impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1102 iwork = ie + m 1103 1104 // Perform bidiagonal QR iteration, computing left singular 1105 // vectors of A in U and computing right singular vectors 1106 // of A in VT. 1107 ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt, 1108 u, ldu, work, 1, work[iwork:]) 1109 } 1110 } 1111 } else if wantva { 1112 if wantun { 1113 // Path 7t. 1114 if lwork >= m*m+max(max(n+m, 4*m), bdspac) { 1115 // Sufficient workspace for a fast algorithm. 1116 ir := 0 1117 var ldworkr int 1118 if lwork >= wrkbl+lda*m { 1119 ldworkr = lda 1120 } else { 1121 ldworkr = m 1122 } 1123 itau := ir + ldworkr*m 1124 iwork := itau + m 1125 1126 // Compute A = L*Q, copying result to VT. 1127 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1128 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1129 1130 // Copy L to work[ir:], zeroing out above it. 1131 impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr) 1132 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr) 1133 1134 // Generate Q in VT. 1135 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1136 1137 ie := itau 1138 itauq := ie + m 1139 itaup := itauq + m 1140 iwork = itaup + m 1141 1142 // Bidiagonalize L in work[ir:]. 1143 impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:], 1144 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1145 1146 // Generate right bidiagonalizing vectors in work[ir:]. 1147 impl.Dorgbr(lapack.GeneratePT, m, m, m, work[ir:], ldworkr, 1148 work[itaup:], work[iwork:], lwork-iwork) 1149 iwork = ie + m 1150 1151 // Perform bidiagonal QR iteration, computing right 1152 // singular vectors of L in work[ir:]. 1153 ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:], 1154 work[ir:], ldworkr, work, 1, work, 1, work[iwork:]) 1155 1156 // Multiply right singular vectors of L in work[ir:] by 1157 // Q in VT, storing result in A. 1158 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1159 work[ir:], ldworkr, vt, ldvt, 0, a, lda) 1160 1161 // Copy right singular vectors of A from A to VT. 1162 impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt) 1163 } else { 1164 // Insufficient workspace for a fast algorithm. 1165 itau := 0 1166 iwork := itau + m 1167 // Compute A = L * Q, copying result to VT. 1168 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1169 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1170 1171 // Generate Q in VT. 1172 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1173 1174 ie := itau 1175 itauq := ie + m 1176 itaup := itauq + m 1177 iwork = itaup + m 1178 1179 // Zero out above L in A. 1180 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 1181 1182 // Bidiagonalize L in A. 1183 impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:], 1184 work[itaup:], work[iwork:], lwork-iwork) 1185 1186 // Multiply right bidiagonalizing vectors in A by Q in VT. 1187 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1188 a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1189 iwork = ie + m 1190 1191 // Perform bidiagonal QR iteration, computing right singular 1192 // vectors of A in VT. 1193 ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:], 1194 vt, ldvt, work, 1, work, 1, work[iwork:]) 1195 } 1196 } else if wantuo { 1197 panic(noSVDO) 1198 } else if wantuas { 1199 // Path 9t. 1200 if lwork >= m*m+max(max(m+n, 4*m), bdspac) { 1201 // Sufficient workspace for a fast algorithm. 1202 iu := 0 1203 1204 var ldworku int 1205 if lwork >= wrkbl+lda*m { 1206 ldworku = lda 1207 } else { 1208 ldworku = m 1209 } 1210 itau := iu + ldworku*m 1211 iwork := itau + m 1212 1213 // Generate A = L * Q copying result to VT. 1214 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1215 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1216 1217 // Generate Q in VT. 1218 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1219 1220 // Copy L to work[iu:], zeroing out above it. 1221 impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku) 1222 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku) 1223 ie = itau 1224 itauq := ie + m 1225 itaup := itauq + m 1226 iwork = itaup + m 1227 1228 // Bidiagonalize L in work[iu:], copying result to U. 1229 impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], 1230 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1231 impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu) 1232 1233 // Generate right bidiagonalizing vectors in work[iu:]. 1234 impl.Dorgbr(lapack.GeneratePT, m, m, m, work[iu:], ldworku, 1235 work[itaup:], work[iwork:], lwork-iwork) 1236 1237 // Generate left bidiagonalizing vectors in U. 1238 impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1239 iwork = ie + m 1240 1241 // Perform bidiagonal QR iteration, computing left singular 1242 // vectors of L in U and computing right singular vectors 1243 // of L in work[iu:]. 1244 ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], 1245 work[iu:], ldworku, u, ldu, work, 1, work[iwork:]) 1246 1247 // Multiply right singular vectors of L in work[iu:] 1248 // Q in VT, storing result in A. 1249 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1250 work[iu:], ldworku, vt, ldvt, 0, a, lda) 1251 1252 // Copy right singular vectors of A from A to VT. 1253 impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt) 1254 } else { 1255 // Insufficient workspace for a fast algorithm. 1256 itau := 0 1257 iwork := itau + m 1258 1259 // Compute A = L * Q, copying result to VT. 1260 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1261 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1262 1263 // Generate Q in VT. 1264 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1265 1266 // Copy L to U, zeroing out above it. 1267 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1268 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu) 1269 1270 ie = itau 1271 itauq := ie + m 1272 itaup := itauq + m 1273 iwork = itaup + m 1274 1275 // Bidiagonalize L in U. 1276 impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:], 1277 work[itaup:], work[iwork:], lwork-iwork) 1278 1279 // Multiply right bidiagonalizing vectors in U by Q in VT. 1280 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1281 u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1282 1283 // Generate left bidiagonalizing vectors in U. 1284 impl.Dorgbr(lapack.GenerateQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1285 iwork = ie + m 1286 1287 // Perform bidiagonal QR iteration, computing left singular 1288 // vectors of A in U and computing right singular vectors 1289 // of A in VT. 1290 ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], 1291 vt, ldvt, u, ldu, work, 1, work[iwork:]) 1292 } 1293 } 1294 } 1295 } else { 1296 // Path 10t. 1297 // N at least M, but not much larger. 1298 ie = 0 1299 itauq := ie + m 1300 itaup := itauq + m 1301 iwork := itaup + m 1302 1303 // Bidiagonalize A. 1304 impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1305 if wantuas { 1306 // If left singular vectors desired in U, copy result to U and 1307 // generate left bidiagonalizing vectors in U. 1308 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1309 impl.Dorgbr(lapack.GenerateQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1310 } 1311 if wantvas { 1312 // If right singular vectors desired in VT, copy result to VT 1313 // and generate right bidiagonalizing vectors in VT. 1314 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1315 var nrvt int 1316 if wantva { 1317 nrvt = n 1318 } else { 1319 nrvt = m 1320 } 1321 impl.Dorgbr(lapack.GeneratePT, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork) 1322 } 1323 if wantuo { 1324 panic(noSVDO) 1325 } 1326 if wantvo { 1327 panic(noSVDO) 1328 } 1329 iwork = ie + m 1330 var nru, ncvt int 1331 if wantuas || wantuo { 1332 nru = m 1333 } 1334 if wantvas || wantvo { 1335 ncvt = n 1336 } 1337 if !wantuo && !wantvo { 1338 // Perform bidiagonal QR iteration, if desired, computing left 1339 // singular vectors in U and computing right singular vectors in 1340 // VT. 1341 ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:], 1342 vt, ldvt, u, ldu, work, 1, work[iwork:]) 1343 } else { 1344 // There will be two branches when the implementation is complete. 1345 panic(noSVDO) 1346 } 1347 } 1348 } 1349 if !ok { 1350 if ie > 1 { 1351 for i := 0; i < minmn-1; i++ { 1352 work[i+1] = work[i+ie] 1353 } 1354 } 1355 if ie < 1 { 1356 for i := minmn - 2; i >= 0; i-- { 1357 work[i+1] = work[i+ie] 1358 } 1359 } 1360 } 1361 // Undo scaling if necessary. 1362 if iscl { 1363 if anrm > bignum { 1364 impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn, s, minmn) 1365 } 1366 if !ok && anrm > bignum { 1367 impl.Dlascl(lapack.General, 0, 0, bignum, anrm, 1, minmn-1, work[1:], minmn) 1368 } 1369 if anrm < smlnum { 1370 impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn, s, minmn) 1371 } 1372 if !ok && anrm < smlnum { 1373 impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, 1, minmn-1, work[1:], minmn) 1374 } 1375 } 1376 work[0] = float64(maxwrk) 1377 return ok 1378 }