github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dgesvd.go (about) 1 // Copyright ©2015 The gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package native 6 7 import ( 8 "math" 9 10 "github.com/gonum/blas" 11 "github.com/gonum/blas/blas64" 12 "github.com/gonum/lapack" 13 ) 14 15 const noSVDO = "dgesvd: not coded for overwrite" 16 17 // Dgesvd computes the singular value decomposition of the input matrix A. 18 // 19 // The singular value decomposition is 20 // A = U * Sigma * V^T 21 // where Sigma is an m×n diagonal matrix containing the singular values of A, 22 // U is an m×m orthogonal matrix and V is an n×n orthogonal matrix. The first 23 // min(m,n) columns of U and V are the left and right singular vectors of A 24 // respectively. 25 // 26 // jobU and jobVT are options for computing the singular vectors. The behavior 27 // is as follows 28 // jobU == lapack.SVDAll All m columns of U are returned in u 29 // jobU == lapack.SVDInPlace The first min(m,n) columns are returned in u 30 // jobU == lapack.SVDOverwrite The first min(m,n) columns of U are written into a 31 // jobU == lapack.SVDNone The columns of U are not computed. 32 // The behavior is the same for jobVT and the rows of V^T. At most one of jobU 33 // and jobVT can equal lapack.SVDOverwrite, and Dgesvd will panic otherwise. 34 // 35 // On entry, a contains the data for the m×n matrix A. During the call to Dgesvd 36 // the data is overwritten. On exit, A contains the appropriate singular vectors 37 // if either job is lapack.SVDOverwrite. 38 // 39 // s is a slice of length at least min(m,n) and on exit contains the singular 40 // values in decreasing order. 41 // 42 // u contains the left singular vectors on exit, stored column-wise. If 43 // jobU == lapack.SVDAll, u is of size m×m. If jobU == lapack.SVDInPlace u is 44 // of size m×min(m,n). If jobU == lapack.SVDOverwrite or lapack.SVDNone, u is 45 // not used. 46 // 47 // vt contains the left singular vectors on exit, stored row-wise. If 48 // jobV == lapack.SVDAll, vt is of size n×m. If jobVT == lapack.SVDInPlace vt is 49 // of size min(m,n)×n. If jobVT == lapack.SVDOverwrite or lapack.SVDNone, vt is 50 // not used. 51 // 52 // work is a slice for storing temporary memory, and lwork is the usable size of 53 // the slice. lwork must be at least max(5*min(m,n), 3*min(m,n)+max(m,n)). 54 // If lwork == -1, instead of performing Dgesvd, the optimal work length will be 55 // stored into work[0]. Dgesvd will panic if the working memory has insufficient 56 // storage. 57 // 58 // Dgesvd returns whether the decomposition successfully completed. 59 func (impl Implementation) Dgesvd(jobU, jobVT lapack.SVDJob, m, n int, a []float64, lda int, s, u []float64, ldu int, vt []float64, ldvt int, work []float64, lwork int) (ok bool) { 60 minmn := min(m, n) 61 checkMatrix(m, n, a, lda) 62 if jobU == lapack.SVDAll { 63 checkMatrix(m, m, u, ldu) 64 } else if jobU == lapack.SVDInPlace { 65 checkMatrix(m, minmn, u, ldu) 66 } 67 if jobVT == lapack.SVDAll { 68 checkMatrix(n, n, vt, ldvt) 69 } else if jobVT == lapack.SVDInPlace { 70 checkMatrix(minmn, n, vt, ldvt) 71 } 72 if jobU == lapack.SVDOverwrite && jobVT == lapack.SVDOverwrite { 73 panic("lapack: both jobU and jobVT are lapack.SVDOverwrite") 74 } 75 if len(s) < minmn { 76 panic(badS) 77 } 78 if jobU == lapack.SVDOverwrite || jobVT == lapack.SVDOverwrite { 79 panic(noSVDO) 80 } 81 if m == 0 || n == 0 { 82 return true 83 } 84 85 wantua := jobU == lapack.SVDAll 86 wantus := jobU == lapack.SVDInPlace 87 wantuas := wantua || wantus 88 wantuo := jobU == lapack.SVDOverwrite 89 wantun := jobU == lapack.None 90 91 wantva := jobVT == lapack.SVDAll 92 wantvs := jobVT == lapack.SVDInPlace 93 wantvas := wantva || wantvs 94 wantvo := jobVT == lapack.SVDOverwrite 95 wantvn := jobVT == lapack.None 96 97 bi := blas64.Implementation() 98 var mnthr int 99 100 // Compute optimal space for subroutines. 101 maxwrk := 1 102 opts := string(jobU) + string(jobVT) 103 var wrkbl, bdspac int 104 if m >= n { 105 mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0) 106 bdspac = 5 * n 107 impl.Dgeqrf(m, n, a, lda, nil, work, -1) 108 lwork_dgeqrf := int(work[0]) 109 impl.Dorgqr(m, n, n, a, lda, nil, work, -1) 110 lwork_dorgqr_n := int(work[0]) 111 impl.Dorgqr(m, m, n, a, lda, nil, work, -1) 112 lwork_dorgqr_m := int(work[0]) 113 impl.Dgebrd(n, n, a, lda, s, nil, nil, nil, work, -1) 114 lwork_dgebrd := int(work[0]) 115 impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, nil, work, -1) 116 lwork_dorgbr_p := int(work[0]) 117 impl.Dorgbr(lapack.ApplyQ, n, n, n, a, lda, nil, work, -1) 118 lwork_dorgbr_q := int(work[0]) 119 120 if m >= mnthr { 121 // m >> n 122 if wantun { 123 // Path 1 124 maxwrk = n + lwork_dgeqrf 125 maxwrk = max(maxwrk, 3*n+lwork_dgebrd) 126 if wantvo || wantvas { 127 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p) 128 } 129 maxwrk = max(maxwrk, bdspac) 130 } else if wantuo && wantvn { 131 // Path 2 132 wrkbl = n + lwork_dgeqrf 133 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 134 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 135 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 136 wrkbl = max(wrkbl, bdspac) 137 maxwrk = max(n*n+wrkbl, n*n+m*n+n) 138 } else if wantuo && wantvs { 139 // Path 3 140 wrkbl = n + lwork_dgeqrf 141 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 142 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 143 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 144 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 145 wrkbl = max(wrkbl, bdspac) 146 maxwrk = max(n*n+wrkbl, n*n+m*n+n) 147 } else if wantus && wantvn { 148 // Path 4 149 wrkbl = n + lwork_dgeqrf 150 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 151 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 152 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 153 wrkbl = max(wrkbl, bdspac) 154 maxwrk = n*n + wrkbl 155 } else if wantus && wantvo { 156 // Path 5 157 wrkbl = n + lwork_dgeqrf 158 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 159 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 160 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 161 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 162 wrkbl = max(wrkbl, bdspac) 163 maxwrk = 2*n*n + wrkbl 164 } else if wantus && wantvas { 165 // Path 6 166 wrkbl = n + lwork_dgeqrf 167 wrkbl = max(wrkbl, n+lwork_dorgqr_n) 168 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 169 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 170 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 171 wrkbl = max(wrkbl, bdspac) 172 maxwrk = n*n + wrkbl 173 } else if wantua && wantvn { 174 // Path 7 175 wrkbl = n + lwork_dgeqrf 176 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 177 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 178 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 179 wrkbl = max(wrkbl, bdspac) 180 maxwrk = n*n + wrkbl 181 } else if wantua && wantvo { 182 // Path 8 183 wrkbl = n + lwork_dgeqrf 184 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 185 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 186 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 187 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 188 wrkbl = max(wrkbl, bdspac) 189 maxwrk = 2*n*n + wrkbl 190 } else if wantua && wantvas { 191 // Path 9 192 wrkbl = n + lwork_dgeqrf 193 wrkbl = max(wrkbl, n+lwork_dorgqr_m) 194 wrkbl = max(wrkbl, 3*n+lwork_dgebrd) 195 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_q) 196 wrkbl = max(wrkbl, 3*n+lwork_dorgbr_p) 197 wrkbl = max(wrkbl, bdspac) 198 maxwrk = n*n + wrkbl 199 } 200 } else { 201 // Path 10: m > n 202 impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1) 203 lwork_dgebrd := int(work[0]) 204 maxwrk = 3*n + lwork_dgebrd 205 if wantus || wantuo { 206 impl.Dorgbr(lapack.ApplyQ, m, n, n, a, lda, nil, work, -1) 207 lwork_dorgbr_q = int(work[0]) 208 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q) 209 } 210 if wantua { 211 impl.Dorgbr(lapack.ApplyQ, m, m, n, a, lda, nil, work, -1) 212 lwork_dorgbr_q := int(work[0]) 213 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_q) 214 } 215 if !wantvn { 216 maxwrk = max(maxwrk, 3*n+lwork_dorgbr_p) 217 } 218 maxwrk = max(maxwrk, bdspac) 219 } 220 } else { 221 mnthr = impl.Ilaenv(6, "DGESVD", opts, m, n, 0, 0) 222 223 bdspac = 5 * m 224 impl.Dgelqf(m, n, a, lda, nil, work, -1) 225 lwork_dgelqf := int(work[0]) 226 impl.Dorglq(n, n, m, nil, n, nil, work, -1) 227 lwork_dorglq_n := int(work[0]) 228 impl.Dorglq(m, n, m, a, lda, nil, work, -1) 229 lwork_dorglq_m := int(work[0]) 230 impl.Dgebrd(m, m, a, lda, s, nil, nil, nil, work, -1) 231 lwork_dgebrd := int(work[0]) 232 impl.Dorgbr(lapack.ApplyP, m, m, m, a, n, nil, work, -1) 233 lwork_dorgbr_p := int(work[0]) 234 impl.Dorgbr(lapack.ApplyQ, m, m, m, a, n, nil, work, -1) 235 lwork_dorgbr_q := int(work[0]) 236 if n >= mnthr { 237 // n >> m 238 if wantvn { 239 // Path 1t 240 maxwrk = m + lwork_dgelqf 241 maxwrk = max(maxwrk, 3*m+lwork_dgebrd) 242 if wantuo || wantuas { 243 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q) 244 } 245 maxwrk = max(maxwrk, bdspac) 246 } else if wantvo && wantun { 247 // Path 2t 248 wrkbl = m + lwork_dgelqf 249 wrkbl = max(wrkbl, m+lwork_dorglq_m) 250 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 251 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 252 wrkbl = max(wrkbl, bdspac) 253 maxwrk = max(m*m+wrkbl, m*m+m*n+m) 254 } else if wantvo && wantuas { 255 // Path 3t 256 wrkbl = m + lwork_dgelqf 257 wrkbl = max(wrkbl, m+lwork_dorglq_m) 258 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 259 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 260 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 261 wrkbl = max(wrkbl, bdspac) 262 maxwrk = max(m*m+wrkbl, m*m+m*n+m) 263 } else if wantvs && wantun { 264 // Path 4t 265 wrkbl = m + lwork_dgelqf 266 wrkbl = max(wrkbl, m+lwork_dorglq_m) 267 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 268 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 269 wrkbl = max(wrkbl, bdspac) 270 maxwrk = m*m + wrkbl 271 } else if wantvs && wantuo { 272 // Path 5t 273 wrkbl = m + lwork_dgelqf 274 wrkbl = max(wrkbl, m+lwork_dorglq_m) 275 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 276 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 277 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 278 wrkbl = max(wrkbl, bdspac) 279 maxwrk = 2*m*m + wrkbl 280 } else if wantvs && wantuas { 281 // Path 6t 282 wrkbl = m + lwork_dgelqf 283 wrkbl = max(wrkbl, m+lwork_dorglq_m) 284 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 285 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 286 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 287 wrkbl = max(wrkbl, bdspac) 288 maxwrk = m*m + wrkbl 289 } else if wantva && wantun { 290 // Path 7t 291 wrkbl = m + lwork_dgelqf 292 wrkbl = max(wrkbl, m+lwork_dorglq_n) 293 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 294 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 295 wrkbl = max(wrkbl, bdspac) 296 maxwrk = m*m + wrkbl 297 } else if wantva && wantuo { 298 // Path 8t 299 wrkbl = m + lwork_dgelqf 300 wrkbl = max(wrkbl, m+lwork_dorglq_n) 301 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 302 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 303 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 304 wrkbl = max(wrkbl, bdspac) 305 maxwrk = 2*m*m + wrkbl 306 } else if wantva && wantuas { 307 // Path 9t 308 wrkbl = m + lwork_dgelqf 309 wrkbl = max(wrkbl, m+lwork_dorglq_n) 310 wrkbl = max(wrkbl, 3*m+lwork_dgebrd) 311 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_p) 312 wrkbl = max(wrkbl, 3*m+lwork_dorgbr_q) 313 wrkbl = max(wrkbl, bdspac) 314 maxwrk = m*m + wrkbl 315 } 316 } else { 317 // Path 10t, n > m 318 impl.Dgebrd(m, n, a, lda, s, nil, nil, nil, work, -1) 319 lwork_dgebrd = int(work[0]) 320 maxwrk := 3*m + lwork_dgebrd 321 if wantvs || wantvo { 322 impl.Dorgbr(lapack.ApplyP, m, n, m, a, n, nil, work, -1) 323 lwork_dorgbr_p = int(work[0]) 324 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p) 325 } 326 if wantva { 327 impl.Dorgbr(lapack.ApplyP, n, n, m, a, n, nil, work, -1) 328 lwork_dorgbr_p = int(work[0]) 329 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_p) 330 } 331 if !wantun { 332 maxwrk = max(maxwrk, 3*m+lwork_dorgbr_q) 333 } 334 maxwrk = max(maxwrk, bdspac) 335 } 336 } 337 338 minWork := max(1, 5*minmn) 339 if !((wantun && m >= mnthr) || (wantvn && n >= mnthr)) { 340 minWork = max(minWork, 3*minmn+max(m, n)) 341 } 342 343 if lwork != -1 { 344 if len(work) < lwork { 345 panic(badWork) 346 } 347 if lwork < minWork { 348 panic(badWork) 349 } 350 } 351 if m == 0 || n == 0 { 352 return true 353 } 354 355 maxwrk = max(maxwrk, minWork) 356 work[0] = float64(maxwrk) 357 if lwork == -1 { 358 return true 359 } 360 361 // Perform decomposition. 362 eps := dlamchE 363 smlnum := math.Sqrt(dlamchS) / eps 364 bignum := 1 / smlnum 365 366 // Scale A if max element outside range [smlnum, bignum]. 367 anrm := impl.Dlange(lapack.MaxAbs, m, n, a, lda, nil) 368 var iscl bool 369 if anrm > 0 && anrm < smlnum { 370 iscl = true 371 impl.Dlascl(lapack.General, 0, 0, anrm, smlnum, m, n, a, lda) 372 } else if anrm > bignum { 373 iscl = true 374 impl.Dlascl(lapack.General, 0, 0, anrm, bignum, m, n, a, lda) 375 } 376 377 var ie int 378 if m >= n { 379 // If A has sufficiently more rows than columns, use the QR decomposition. 380 if m >= mnthr { 381 // m >> n 382 if wantun { 383 // Path 1. 384 itau := 0 385 iwork := itau + n 386 387 // Compute A = Q * R. 388 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 389 390 // Zero out below R. 391 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 392 ie = 0 393 itauq := ie + n 394 itaup := itauq + n 395 iwork = itaup + n 396 // Bidiagonalize R in A. 397 impl.Dgebrd(n, n, a, lda, s, work[ie:], work[itauq:], 398 work[itaup:], work[iwork:], lwork-iwork) 399 ncvt := 0 400 if wantvo || wantvas { 401 // Generate P^T. 402 impl.Dorgbr(lapack.ApplyP, n, n, n, a, lda, work[itaup:], 403 work[iwork:], lwork-iwork) 404 ncvt = n 405 } 406 iwork = ie + n 407 408 // Perform bidiagonal QR iteration computing right singular vectors 409 // of A in A if desired. 410 ok = impl.Dbdsqr(blas.Upper, n, ncvt, 0, 0, s, work[ie:], 411 a, lda, work, 1, work, 1, work[iwork:]) 412 413 // If right singular vectors desired in VT, copy them there. 414 if wantvas { 415 impl.Dlacpy(blas.All, n, n, a, lda, vt, ldvt) 416 } 417 } else if wantuo && wantvn { 418 // Path 2 419 panic(noSVDO) 420 } else if wantuo && wantvas { 421 // Path 3 422 panic(noSVDO) 423 } else if wantus { 424 if wantvn { 425 // Path 4 426 if lwork >= n*n+max(4*n, bdspac) { 427 // Sufficient workspace for a fast algorithm. 428 ir := 0 429 var ldworkr int 430 if lwork >= wrkbl+lda*n { 431 ldworkr = lda 432 } else { 433 ldworkr = n 434 } 435 itau := ir + ldworkr*n 436 iwork := itau + n 437 // Compute A = Q * R. 438 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 439 440 // Copy R to work[ir:], zeroing out below it. 441 impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr) 442 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr) 443 444 // Generate Q in A. 445 impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 446 ie := itau 447 itauq := ie + n 448 itaup := itauq + n 449 iwork = itaup + n 450 451 // Bidiagonalize R in work[ir:]. 452 impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:], 453 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 454 455 // Generate left vectors bidiagonalizing R in work[ir:]. 456 impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr, 457 work[itauq:], work[iwork:], lwork-iwork) 458 iwork = ie + n 459 460 // Perform bidiagonal QR iteration, compuing left singular 461 // vectors of R in work[ir:]. 462 ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1, 463 work[ir:], ldworkr, work, 1, work[iwork:]) 464 465 // Multiply Q in A by left singular vectors of R in 466 // work[ir:], storing result in U. 467 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda, 468 work[ir:], ldworkr, 0, u, ldu) 469 } else { 470 // Insufficient workspace for a fast algorithm. 471 itau := 0 472 iwork := itau + n 473 474 // Compute A = Q*R, copying result to U. 475 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 476 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 477 478 // Generate Q in U. 479 impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 480 ie := itau 481 itauq := ie + n 482 itaup := itauq + n 483 iwork = itaup + n 484 485 // Zero out below R in A. 486 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 487 488 // Bidiagonalize R in A. 489 impl.Dgebrd(n, n, a, lda, s, work[ie:], 490 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 491 492 // Multiply Q in U by left vectors bidiagonalizing R. 493 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 494 a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 495 iwork = ie + n 496 497 // Perform bidiagonal QR iteration, computing left 498 // singular vectors of A in U. 499 ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], work, 1, 500 u, ldu, work, 1, work[iwork:]) 501 } 502 } else if wantvo { 503 // Path 5 504 panic(noSVDO) 505 } else if wantvas { 506 // Path 6 507 if lwork >= n*n+max(4*n, bdspac) { 508 // Sufficient workspace for a fast algorithm. 509 iu := 0 510 var ldworku int 511 if lwork >= wrkbl+lda*n { 512 ldworku = lda 513 } else { 514 ldworku = n 515 } 516 itau := iu + ldworku*n 517 iwork := itau + n 518 519 // Compute A = Q * R. 520 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 521 // Copy R to work[iu:], zeroing out below it. 522 impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku) 523 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku) 524 525 // Generate Q in A. 526 impl.Dorgqr(m, n, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 527 528 ie := itau 529 itauq := ie + n 530 itaup := itauq + n 531 iwork = itaup + n 532 533 // Bidiagonalize R in work[iu:], copying result to VT. 534 impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:], 535 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 536 impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt) 537 538 // Generate left bidiagonalizing vectors in work[iu:]. 539 impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku, 540 work[itauq:], work[iwork:], lwork-iwork) 541 542 // Generate right bidiagonalizing vectors in VT. 543 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, 544 work[itaup:], work[iwork:], lwork-iwork) 545 iwork = ie + n 546 547 // Perform bidiagonal QR iteration, computing left singular 548 // vectors of R in work[iu:], and computing right singular 549 // vectors of R in VT. 550 ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:], 551 vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:]) 552 553 // Multiply Q in A by left singular vectors of R in 554 // work[iu:], storing result in U. 555 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, a, lda, 556 work[iu:], ldworku, 0, u, ldu) 557 } else { 558 // Insufficient workspace for a fast algorithm. 559 itau := 0 560 iwork := itau + n 561 562 // Compute A = Q * R, copying result to U. 563 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 564 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 565 566 // Generate Q in U. 567 impl.Dorgqr(m, n, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 568 569 // Copy R to VT, zeroing out below it. 570 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 571 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt) 572 573 ie := itau 574 itauq := ie + n 575 itaup := itauq + n 576 iwork = itaup + n 577 578 // Bidiagonalize R in VT. 579 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 580 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 581 582 // Multiply Q in U by left bidiagonalizing vectors in VT. 583 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 584 vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 585 586 // Generate right bidiagonalizing vectors in VT. 587 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, 588 work[itaup:], work[iwork:], lwork-iwork) 589 iwork = ie + n 590 591 // Perform bidiagonal QR iteration, computing left singular 592 // vectors of A in U and computing right singular vectors 593 // of A in VT. 594 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 595 vt, ldvt, u, ldu, work, 1, work[iwork:]) 596 } 597 } 598 } else if wantua { 599 if wantvn { 600 // Path 7 601 if lwork >= n*n+max(max(n+m, 4*n), bdspac) { 602 // Sufficient workspace for a fast algorithm. 603 ir := 0 604 var ldworkr int 605 if lwork >= wrkbl+lda*n { 606 ldworkr = lda 607 } else { 608 ldworkr = n 609 } 610 itau := ir + ldworkr*n 611 iwork := itau + n 612 613 // Compute A = Q*R, copying result to U. 614 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 615 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 616 617 // Copy R to work[ir:], zeroing out below it. 618 impl.Dlacpy(blas.Upper, n, n, a, lda, work[ir:], ldworkr) 619 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[ir+ldworkr:], ldworkr) 620 621 // Generate Q in U. 622 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 623 ie := itau 624 itauq := ie + n 625 itaup := itauq + n 626 iwork = itaup + n 627 628 // Bidiagonalize R in work[ir:]. 629 impl.Dgebrd(n, n, work[ir:], ldworkr, s, work[ie:], 630 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 631 632 // Generate left bidiagonalizing vectors in work[ir:]. 633 impl.Dorgbr(lapack.ApplyQ, n, n, n, work[ir:], ldworkr, 634 work[itauq:], work[iwork:], lwork-iwork) 635 iwork = ie + n 636 637 // Perform bidiagonal QR iteration, computing left singular 638 // vectors of R in work[ir:]. 639 ok = impl.Dbdsqr(blas.Upper, n, 0, n, 0, s, work[ie:], work, 1, 640 work[ir:], ldworkr, work, 1, work[iwork:]) 641 642 // Multiply Q in U by left singular vectors of R in 643 // work[ir:], storing result in A. 644 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, u, ldu, 645 work[ir:], ldworkr, 0, a, lda) 646 647 // Copy left singular vectors of A from A to U. 648 impl.Dlacpy(blas.All, m, n, a, lda, u, ldu) 649 } else { 650 // Insufficient workspace for a fast algorithm. 651 itau := 0 652 iwork := itau + n 653 654 // Compute A = Q*R, copying result to U. 655 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 656 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 657 658 // Generate Q in U. 659 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 660 ie := itau 661 itauq := ie + n 662 itaup := itauq + n 663 iwork = itaup + n 664 665 // Zero out below R in A. 666 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, a[lda:], lda) 667 668 // Bidiagonalize R in A. 669 impl.Dgebrd(n, n, a, lda, s, work[ie:], 670 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 671 672 // Multiply Q in U by left bidiagonalizing vectors in A. 673 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, m, n, n, 674 a, lda, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 675 iwork = ie + n 676 677 // Perform bidiagonal QR iteration, computing left 678 // singular vectors of A in U. 679 ok = impl.Dbdsqr(blas.Upper, n, 0, m, 0, s, work[ie:], 680 work, 1, u, ldu, work, 1, work[iwork:]) 681 } 682 } else if wantvo { 683 // Path 8. 684 panic(noSVDO) 685 } else if wantvas { 686 // Path 9. 687 if lwork >= n*n+max(max(n+m, 4*n), bdspac) { 688 // Sufficient workspace for a fast algorithm. 689 iu := 0 690 var ldworku int 691 if lwork >= wrkbl+lda*n { 692 ldworku = lda 693 } else { 694 ldworku = n 695 } 696 itau := iu + ldworku*n 697 iwork := itau + n 698 699 // Compute A = Q * R, copying result to U. 700 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 701 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 702 703 // Generate Q in U. 704 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 705 706 // Copy R to work[iu:], zeroing out below it. 707 impl.Dlacpy(blas.Upper, n, n, a, lda, work[iu:], ldworku) 708 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, work[iu+ldworku:], ldworku) 709 710 ie = itau 711 itauq := ie + n 712 itaup := itauq + n 713 iwork = itaup + n 714 715 // Bidiagonalize R in work[iu:], copying result to VT. 716 impl.Dgebrd(n, n, work[iu:], ldworku, s, work[ie:], 717 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 718 impl.Dlacpy(blas.Upper, n, n, work[iu:], ldworku, vt, ldvt) 719 720 // Generate left bidiagonalizing vectors in work[iu:]. 721 impl.Dorgbr(lapack.ApplyQ, n, n, n, work[iu:], ldworku, 722 work[itauq:], work[iwork:], lwork-iwork) 723 724 // Generate right bidiagonalizing vectors in VT. 725 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, 726 work[itaup:], work[iwork:], lwork-iwork) 727 iwork = ie + n 728 729 // Perform bidiagonal QR iteration, computing left singular 730 // vectors of R in work[iu:] and computing right 731 // singular vectors of R in VT. 732 ok = impl.Dbdsqr(blas.Upper, n, n, n, 0, s, work[ie:], 733 vt, ldvt, work[iu:], ldworku, work, 1, work[iwork:]) 734 735 // Multiply Q in U by left singular vectors of R in 736 // work[iu:], storing result in A. 737 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, n, 1, 738 u, ldu, work[iu:], ldworku, 0, a, lda) 739 740 // Copy left singular vectors of A from A to U. 741 impl.Dlacpy(blas.All, m, n, a, lda, u, ldu) 742 743 /* 744 // Bidiagonalize R in VT. 745 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 746 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 747 748 // Multiply Q in U by left bidiagonalizing vectors in VT. 749 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, 750 m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 751 752 // Generate right bidiagonalizing vectors in VT. 753 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, 754 work[itaup:], work[iwork:], lwork-iwork) 755 iwork = ie + n 756 757 // Perform bidiagonal QR iteration, computing left singular 758 // vectors of A in U and computing right singular vectors 759 // of A in VT. 760 ok = impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 761 vt, ldvt, u, ldu, work, 1, work[iwork:]) 762 */ 763 } else { 764 // Insufficient workspace for a fast algorithm. 765 itau := 0 766 iwork := itau + n 767 768 // Compute A = Q*R, copying result to U. 769 impl.Dgeqrf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 770 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 771 772 // Generate Q in U. 773 impl.Dorgqr(m, m, n, u, ldu, work[itau:], work[iwork:], lwork-iwork) 774 775 // Copy R from A to VT, zeroing out below it. 776 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 777 impl.Dlaset(blas.Lower, n-1, n-1, 0, 0, vt[ldvt:], ldvt) 778 779 ie := itau 780 itauq := ie + n 781 itaup := itauq + n 782 iwork = itaup + n 783 784 // Bidiagonalize R in VT. 785 impl.Dgebrd(n, n, vt, ldvt, s, work[ie:], 786 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 787 788 // Multiply Q in U by left bidiagonalizing vectors in VT. 789 impl.Dormbr(lapack.ApplyQ, blas.Right, blas.NoTrans, 790 m, n, n, vt, ldvt, work[itauq:], u, ldu, work[iwork:], lwork-iwork) 791 792 // Generate right bidiagonizing vectors in VT. 793 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, 794 work[itaup:], work[iwork:], lwork-iwork) 795 iwork = ie + n 796 797 // Perform bidiagonal QR iteration, computing left singular 798 // vectors of A in U and computing right singular vectors 799 // of A in VT. 800 impl.Dbdsqr(blas.Upper, n, n, m, 0, s, work[ie:], 801 vt, ldvt, u, ldu, work, 1, work[iwork:]) 802 } 803 } 804 } 805 } else { 806 // Path 10. 807 // M at least N, but not much larger. 808 ie = 0 809 itauq := ie + n 810 itaup := itauq + n 811 iwork := itaup + n 812 813 // Bidiagonalize A. 814 impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], 815 work[itaup:], work[iwork:], lwork-iwork) 816 if wantuas { 817 // Left singular vectors are desired in U. Copy result to U and 818 // generate left biadiagonalizing vectors in U. 819 impl.Dlacpy(blas.Lower, m, n, a, lda, u, ldu) 820 var ncu int 821 if wantus { 822 ncu = n 823 } 824 if wantua { 825 ncu = m 826 } 827 impl.Dorgbr(lapack.ApplyQ, m, ncu, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 828 } 829 if wantvas { 830 // Right singular vectors are desired in VT. Copy result to VT and 831 // generate left biadiagonalizing vectors in VT. 832 impl.Dlacpy(blas.Upper, n, n, a, lda, vt, ldvt) 833 impl.Dorgbr(lapack.ApplyP, n, n, n, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork) 834 } 835 if wantuo { 836 panic(noSVDO) 837 } 838 if wantvo { 839 panic(noSVDO) 840 } 841 iwork = ie + n 842 var nru, ncvt int 843 if wantuas || wantuo { 844 nru = m 845 } 846 if wantun { 847 nru = 0 848 } 849 if wantvas || wantvo { 850 ncvt = n 851 } 852 if wantvn { 853 ncvt = 0 854 } 855 if !wantuo && !wantvo { 856 // Perform bidiagonal QR iteration, if desired, computing left 857 // singular vectors in U and right singular vectors in VT. 858 ok = impl.Dbdsqr(blas.Upper, n, ncvt, nru, 0, s, work[ie:], 859 vt, ldvt, u, ldu, work, 1, work[iwork:]) 860 } else { 861 // There will be two branches when the implementation is complete. 862 panic(noSVDO) 863 } 864 } 865 } else { 866 // A has more columns than rows. If A has sufficiently more columns than 867 // rows, first reduce using the LQ decomposition. 868 if n >= mnthr { 869 // n >> m. 870 if wantvn { 871 // Path 1t. 872 itau := 0 873 iwork := itau + m 874 875 // Compute A = L*Q. 876 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 877 878 // Zero out above L. 879 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 880 ie := 0 881 itauq := ie + m 882 itaup := itauq + m 883 iwork = itaup + m 884 885 // Bidiagonalize L in A. 886 impl.Dgebrd(m, m, a, lda, s, work[ie:itauq], 887 work[itauq:itaup], work[itaup:iwork], work[iwork:], lwork-iwork) 888 if wantuo || wantuas { 889 impl.Dorgbr(lapack.ApplyQ, m, m, m, a, lda, 890 work[itauq:], work[iwork:], lwork-iwork) 891 } 892 iwork = ie + m 893 nru := 0 894 if wantuo || wantuas { 895 nru = m 896 } 897 898 // Perform bidiagonal QR iteration, computing left singular vectors 899 // of A in A if desired. 900 ok = impl.Dbdsqr(blas.Upper, m, 0, nru, 0, s, work[ie:], 901 work, 1, a, lda, work, 1, work[iwork:]) 902 903 // If left singular vectors desired in U, copy them there. 904 if wantuas { 905 impl.Dlacpy(blas.All, m, m, a, lda, u, ldu) 906 } 907 } else if wantvo && wantun { 908 // Path 2t. 909 panic(noSVDO) 910 } else if wantvo && wantuas { 911 // Path 3t. 912 panic(noSVDO) 913 } else if wantvs { 914 if wantun { 915 // Path 4t. 916 if lwork >= m*m+max(4*m, bdspac) { 917 // Sufficient workspace for a fast algorithm. 918 ir := 0 919 var ldworkr int 920 if lwork >= wrkbl+lda*m { 921 ldworkr = lda 922 } else { 923 ldworkr = m 924 } 925 itau := ir + ldworkr*m 926 iwork := itau + m 927 928 // Compute A = L*Q. 929 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 930 931 // Copy L to work[ir:], zeroing out above it. 932 impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr) 933 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr) 934 935 // Generate Q in A. 936 impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork) 937 ie := itau 938 itauq := ie + m 939 itaup := itauq + m 940 iwork = itaup + m 941 942 // Bidiagonalize L in work[ir:]. 943 impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:], 944 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 945 946 // Generate right vectors bidiagonalizing L in work[ir:]. 947 impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr, 948 work[itaup:], work[iwork:], lwork-iwork) 949 iwork = ie + m 950 951 // Perform bidiagonal QR iteration, computing right singular 952 // vectors of L in work[ir:]. 953 ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:], 954 work[ir:], ldworkr, work, 1, work, 1, work[iwork:]) 955 956 // Multiply right singular vectors of L in work[ir:] by 957 // Q in A, storing result in VT. 958 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 959 work[ir:], ldworkr, a, lda, 0, vt, ldvt) 960 } else { 961 // Insufficient workspace for a fast algorithm. 962 itau := 0 963 iwork := itau + m 964 965 // Compute A = L*Q. 966 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 967 968 // Copy result to VT. 969 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 970 971 // Generate Q in VT. 972 impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 973 ie := itau 974 itauq := ie + m 975 itaup := itauq + m 976 iwork = itaup + m 977 978 // Zero out above L in A. 979 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 980 981 // Bidiagonalize L in A. 982 impl.Dgebrd(m, m, a, lda, s, work[ie:], 983 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 984 985 // Multiply right vectors bidiagonalizing L by Q in VT. 986 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 987 a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 988 iwork = ie + m 989 990 // Perform bidiagonal QR iteration, computing right 991 // singular vectors of A in VT. 992 ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:], 993 vt, ldvt, work, 1, work, 1, work[iwork:]) 994 } 995 } else if wantuo { 996 // Path 5t. 997 panic(noSVDO) 998 } else if wantuas { 999 // Path 6t. 1000 if lwork >= m*m+max(4*m, bdspac) { 1001 // Sufficient workspace for a fast algorithm. 1002 iu := 0 1003 var ldworku int 1004 if lwork >= wrkbl+lda*m { 1005 ldworku = lda 1006 } else { 1007 ldworku = m 1008 } 1009 itau := iu + ldworku*m 1010 iwork := itau + m 1011 1012 // Compute A = L*Q. 1013 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1014 1015 // Copy L to work[iu:], zeroing out above it. 1016 impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku) 1017 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku) 1018 1019 // Generate Q in A. 1020 impl.Dorglq(m, n, m, a, lda, work[itau:], work[iwork:], lwork-iwork) 1021 ie := itau 1022 itauq := ie + m 1023 itaup := itauq + m 1024 iwork = itaup + m 1025 1026 // Bidiagonalize L in work[iu:], copying result to U. 1027 impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], 1028 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1029 impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu) 1030 1031 // Generate right bidiagionalizing vectors in work[iu:]. 1032 impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku, 1033 work[itaup:], work[iwork:], lwork-iwork) 1034 1035 // Generate left bidiagonalizing vectors in U. 1036 impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1037 iwork = ie + m 1038 1039 // Perform bidiagonal QR iteration, computing left singular 1040 // vectors of L in U and computing right singular vectors of 1041 // L in work[iu:]. 1042 ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], 1043 work[iu:], ldworku, u, ldu, work, 1, work[iwork:]) 1044 1045 // Multiply right singular vectors of L in work[iu:] by 1046 // Q in A, storing result in VT. 1047 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1048 work[iu:], ldworku, a, lda, 0, vt, ldvt) 1049 } else { 1050 // Insufficient workspace for a fast algorithm. 1051 itau := 0 1052 iwork := itau + m 1053 1054 // Compute A = L*Q, copying result to VT. 1055 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1056 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1057 1058 // Generate Q in VT. 1059 impl.Dorglq(m, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1060 1061 // Copy L to U, zeroing out above it. 1062 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1063 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu) 1064 1065 ie := itau 1066 itauq := ie + m 1067 itaup := itauq + m 1068 iwork = itaup + m 1069 1070 // Bidiagonalize L in U. 1071 impl.Dgebrd(m, m, u, ldu, s, work[ie:], 1072 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1073 1074 // Multiply right bidiagonalizing vectors in U by Q in VT. 1075 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1076 u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1077 1078 // Generate left bidiagonalizing vectors in U. 1079 impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1080 iwork = ie + m 1081 1082 // Perform bidiagonal QR iteration, computing left singular 1083 // vectors of A in U and computing right singular vectors 1084 // of A in VT. 1085 impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], vt, ldvt, 1086 u, ldu, work, 1, work[iwork:]) 1087 } 1088 } 1089 } else if wantva { 1090 if wantun { 1091 // Path 7t. 1092 if lwork >= m*m+max(max(n+m, 4*m), bdspac) { 1093 // Sufficient workspace for a fast algorithm. 1094 ir := 0 1095 var ldworkr int 1096 if lwork >= wrkbl+lda*m { 1097 ldworkr = lda 1098 } else { 1099 ldworkr = m 1100 } 1101 itau := ir + ldworkr*m 1102 iwork := itau + m 1103 1104 // Compute A = L*Q, copying result to VT. 1105 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1106 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1107 1108 // Copy L to work[ir:], zeroing out above it. 1109 impl.Dlacpy(blas.Lower, m, m, a, lda, work[ir:], ldworkr) 1110 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[ir+1:], ldworkr) 1111 1112 // Generate Q in VT. 1113 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1114 1115 ie := itau 1116 itauq := ie + m 1117 itaup := itauq + m 1118 iwork = itaup + m 1119 1120 // Bidiagonalize L in work[ir:]. 1121 impl.Dgebrd(m, m, work[ir:], ldworkr, s, work[ie:], 1122 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1123 1124 // Generate right bidiagonalizing vectors in work[ir:]. 1125 impl.Dorgbr(lapack.ApplyP, m, m, m, work[ir:], ldworkr, 1126 work[itaup:], work[iwork:], lwork-iwork) 1127 iwork = ie + m 1128 1129 // Perform bidiagonal QR iteration, computing right 1130 // singular vectors of L in work[ir:]. 1131 ok = impl.Dbdsqr(blas.Upper, m, m, 0, 0, s, work[ie:], 1132 work[ir:], ldworkr, work, 1, work, 1, work[iwork:]) 1133 1134 // Multiply right singular vectors of L in work[ir:] by 1135 // Q in VT, storing result in A. 1136 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1137 work[ir:], ldworkr, vt, ldvt, 0, a, lda) 1138 1139 // Copy right singular vectors of A from A to VT. 1140 impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt) 1141 } else { 1142 // Insufficient workspace for a fast algorithm. 1143 itau := 0 1144 iwork := itau + m 1145 // Compute A = L * Q, copying result to VT. 1146 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1147 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1148 1149 // Generate Q in VT. 1150 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1151 1152 ie := itau 1153 itauq := ie + m 1154 itaup := itauq + m 1155 iwork = itaup + m 1156 1157 // Zero out above L in A. 1158 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, a[1:], lda) 1159 1160 // Bidiagonalize L in A. 1161 impl.Dgebrd(m, m, a, lda, s, work[ie:], work[itauq:], 1162 work[itaup:], work[iwork:], lwork-iwork) 1163 1164 // Multiply right bidiagonalizing vectors in A by Q in VT. 1165 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1166 a, lda, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1167 iwork = ie + m 1168 1169 // Perform bidiagonal QR iteration, computing right singular 1170 // vectors of A in VT. 1171 ok = impl.Dbdsqr(blas.Upper, m, n, 0, 0, s, work[ie:], 1172 vt, ldvt, work, 1, work, 1, work[iwork:]) 1173 } 1174 } else if wantuo { 1175 panic(noSVDO) 1176 } else if wantuas { 1177 // Path 9t. 1178 if lwork >= m*m+max(max(m+n, 4*m), bdspac) { 1179 // Sufficient workspace for a fast algorithm. 1180 iu := 0 1181 1182 var ldworku int 1183 if lwork >= wrkbl+lda*m { 1184 ldworku = lda 1185 } else { 1186 ldworku = m 1187 } 1188 itau := iu + ldworku*m 1189 iwork := itau + m 1190 1191 // Generate A = L * Q copying result to VT. 1192 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1193 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1194 1195 // Generate Q in VT. 1196 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1197 1198 // Copy L to work[iu:], zeroing out above it. 1199 impl.Dlacpy(blas.Lower, m, m, a, lda, work[iu:], ldworku) 1200 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, work[iu+1:], ldworku) 1201 ie = itau 1202 itauq := ie + m 1203 itaup := itauq + m 1204 iwork = itaup + m 1205 1206 // Bidiagonalize L in work[iu:], copying result to U. 1207 impl.Dgebrd(m, m, work[iu:], ldworku, s, work[ie:], 1208 work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1209 impl.Dlacpy(blas.Lower, m, m, work[iu:], ldworku, u, ldu) 1210 1211 // Generate right bidiagonalizing vectors in work[iu:]. 1212 impl.Dorgbr(lapack.ApplyP, m, m, m, work[iu:], ldworku, 1213 work[itaup:], work[iwork:], lwork-iwork) 1214 1215 // Generate left bidiagonalizing vectors in U. 1216 impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1217 iwork = ie + m 1218 1219 // Perform bidiagonal QR iteration, computing left singular 1220 // vectors of L in U and computing right singular vectors 1221 // of L in work[iu:]. 1222 ok = impl.Dbdsqr(blas.Upper, m, m, m, 0, s, work[ie:], 1223 work[iu:], ldworku, u, ldu, work, 1, work[iwork:]) 1224 1225 // Multiply right singular vectors of L in work[iu:] 1226 // Q in VT, storing result in A. 1227 bi.Dgemm(blas.NoTrans, blas.NoTrans, m, n, m, 1, 1228 work[iu:], ldworku, vt, ldvt, 0, a, lda) 1229 1230 // Copy right singular vectors of A from A to VT. 1231 impl.Dlacpy(blas.All, m, n, a, lda, vt, ldvt) 1232 } else { 1233 // Insufficient workspace for a fast algorithm. 1234 itau := 0 1235 iwork := itau + m 1236 1237 // Compute A = L * Q, copying result to VT. 1238 impl.Dgelqf(m, n, a, lda, work[itau:], work[iwork:], lwork-iwork) 1239 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1240 1241 // Generate Q in VT. 1242 impl.Dorglq(n, n, m, vt, ldvt, work[itau:], work[iwork:], lwork-iwork) 1243 1244 // Copy L to U, zeroing out above it. 1245 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1246 impl.Dlaset(blas.Upper, m-1, m-1, 0, 0, u[1:], ldu) 1247 1248 ie = itau 1249 itauq := ie + m 1250 itaup := itauq + m 1251 iwork = itaup + m 1252 1253 // Bidiagonalize L in U. 1254 impl.Dgebrd(m, m, u, ldu, s, work[ie:], work[itauq:], 1255 work[itaup:], work[iwork:], lwork-iwork) 1256 1257 // Multiply right bidiagonalizing vectors in U by Q in VT. 1258 impl.Dormbr(lapack.ApplyP, blas.Left, blas.Trans, m, n, m, 1259 u, ldu, work[itaup:], vt, ldvt, work[iwork:], lwork-iwork) 1260 1261 // Generate left bidiagonalizing vectors in U. 1262 impl.Dorgbr(lapack.ApplyQ, m, m, m, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1263 iwork = ie + m 1264 1265 // Perform bidiagonal QR iteration, computing left singular 1266 // vectors of A in U and computing right singular vectors 1267 // of A in VT. 1268 ok = impl.Dbdsqr(blas.Upper, m, n, m, 0, s, work[ie:], 1269 vt, ldvt, u, ldu, work, 1, work[iwork:]) 1270 } 1271 } 1272 } 1273 } else { 1274 // Path 10t. 1275 // N at least M, but not much larger. 1276 ie = 0 1277 itauq := ie + m 1278 itaup := itauq + m 1279 iwork := itaup + m 1280 1281 // Bidiagonalize A. 1282 impl.Dgebrd(m, n, a, lda, s, work[ie:], work[itauq:], work[itaup:], work[iwork:], lwork-iwork) 1283 if wantuas { 1284 // If left singular vectors desired in U, copy result to U and 1285 // generate left bidiagonalizing vectors in U. 1286 impl.Dlacpy(blas.Lower, m, m, a, lda, u, ldu) 1287 impl.Dorgbr(lapack.ApplyQ, m, m, n, u, ldu, work[itauq:], work[iwork:], lwork-iwork) 1288 } 1289 if wantvas { 1290 // If right singular vectors desired in VT, copy result to VT 1291 // and generate right bidiagonalizing vectors in VT. 1292 impl.Dlacpy(blas.Upper, m, n, a, lda, vt, ldvt) 1293 var nrvt int 1294 if wantva { 1295 nrvt = n 1296 } else { 1297 nrvt = m 1298 } 1299 impl.Dorgbr(lapack.ApplyP, nrvt, n, m, vt, ldvt, work[itaup:], work[iwork:], lwork-iwork) 1300 } 1301 if wantuo { 1302 panic(noSVDO) 1303 } 1304 if wantvo { 1305 panic(noSVDO) 1306 } 1307 iwork = ie + m 1308 var nru, ncvt int 1309 if wantuas || wantuo { 1310 nru = m 1311 } 1312 if wantvas || wantvo { 1313 ncvt = n 1314 } 1315 if !wantuo && !wantvo { 1316 // Perform bidiagonal QR iteration, if desired, computing left 1317 // singular vectors in U and computing right singular vectors in 1318 // VT. 1319 ok = impl.Dbdsqr(blas.Lower, m, ncvt, nru, 0, s, work[ie:], 1320 vt, ldvt, u, ldu, work, 1, work[iwork:]) 1321 } else { 1322 // There will be two branches when the implementation is complete. 1323 panic(noSVDO) 1324 } 1325 } 1326 } 1327 if !ok { 1328 if ie > 1 { 1329 for i := 0; i < minmn-1; i++ { 1330 work[i+1] = work[i+ie] 1331 } 1332 } 1333 if ie < 1 { 1334 for i := minmn - 2; i >= 0; i-- { 1335 work[i+1] = work[i+ie] 1336 } 1337 } 1338 } 1339 // Undo scaling if necessary. 1340 if iscl { 1341 if anrm > bignum { 1342 impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn, 1, s, minmn) 1343 } 1344 if !ok && anrm > bignum { 1345 impl.Dlascl(lapack.General, 0, 0, bignum, anrm, minmn-1, 1, work[minmn:], minmn) 1346 } 1347 if anrm < smlnum { 1348 impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn, 1, s, minmn) 1349 } 1350 if !ok && anrm < smlnum { 1351 impl.Dlascl(lapack.General, 0, 0, smlnum, anrm, minmn-1, 1, work[minmn:], minmn) 1352 } 1353 } 1354 work[0] = float64(maxwrk) 1355 return ok 1356 }