gonum.org/v1/gonum@v0.14.0/lapack/gonum/dlaqr23.go (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math" 9 10 "gonum.org/v1/gonum/blas" 11 "gonum.org/v1/gonum/blas/blas64" 12 "gonum.org/v1/gonum/lapack" 13 ) 14 15 // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper 16 // Hessenberg matrix to detect and deflate fully converged eigenvalues from a 17 // trailing principal submatrix using aggressive early deflation [1]. 18 // 19 // On return, H will be overwritten by a new Hessenberg matrix that is a 20 // perturbation of an orthogonal similarity transformation of H. It is hoped 21 // that on output H will have many zero subdiagonal entries. 22 // 23 // If wantt is true, the matrix H will be fully updated so that the 24 // quasi-triangular Schur factor can be computed. If wantt is false, then only 25 // enough of H will be updated to preserve the eigenvalues. 26 // 27 // If wantz is true, the orthogonal similarity transformation will be 28 // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced. 29 // 30 // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal 31 // of H. It must hold that 32 // 33 // 0 <= ilo <= ihi < n if n > 0, 34 // ilo == 0 and ihi == -1 if n == 0, 35 // 36 // and the block must be isolated, that is, it must hold that 37 // 38 // ktop == 0 or H[ktop,ktop-1] == 0, 39 // kbot == n-1 or H[kbot+1,kbot] == 0, 40 // 41 // otherwise Dlaqr23 will panic. 42 // 43 // nw is the deflation window size. It must hold that 44 // 45 // 0 <= nw <= kbot-ktop+1, 46 // 47 // otherwise Dlaqr23 will panic. 48 // 49 // iloz and ihiz specify the rows of the n×n matrix Z to which transformations 50 // will be applied if wantz is true. It must hold that 51 // 52 // 0 <= iloz <= ktop, and kbot <= ihiz < n, 53 // 54 // otherwise Dlaqr23 will panic. 55 // 56 // sr and si must have length kbot+1, otherwise Dlaqr23 will panic. 57 // 58 // v and ldv represent an nw×nw work matrix. 59 // t and ldt represent an nw×nh work matrix, and nh must be at least nw. 60 // wv and ldwv represent an nv×nw work matrix. 61 // 62 // work must have length at least lwork and lwork must be at least max(1,2*nw), 63 // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater 64 // efficiency. On return, work[0] will contain the optimal value of lwork. 65 // 66 // If lwork is -1, instead of performing Dlaqr23, the function only estimates the 67 // optimal workspace size and stores it into work[0]. Neither h nor z are 68 // accessed. 69 // 70 // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves 71 // as DLAQR3, for recur == 0 it behaves as DLAQR2. 72 // 73 // On return, ns and nd will contain respectively the number of unconverged 74 // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in 75 // sr and si. 76 // 77 // On return, the real and imaginary parts of approximate eigenvalues that may 78 // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1] 79 // and si[kbot-nd-ns+1:kbot-nd+1]. 80 // 81 // On return, the real and imaginary parts of converged eigenvalues will be 82 // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1]. 83 // 84 // References: 85 // 86 // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: 87 // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973 88 // URL: http://dx.doi.org/10.1137/S0895479801384585 89 func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) { 90 switch { 91 case n < 0: 92 panic(nLT0) 93 case ktop < 0 || max(0, n-1) < ktop: 94 panic(badKtop) 95 case kbot < min(ktop, n-1) || n <= kbot: 96 panic(badKbot) 97 case nw < 0 || kbot-ktop+1+1 < nw: 98 panic(badNw) 99 case ldh < max(1, n): 100 panic(badLdH) 101 case wantz && (iloz < 0 || ktop < iloz): 102 panic(badIloz) 103 case wantz && (ihiz < kbot || n <= ihiz): 104 panic(badIhiz) 105 case ldz < 1, wantz && ldz < n: 106 panic(badLdZ) 107 case ldv < max(1, nw): 108 panic(badLdV) 109 case nh < nw: 110 panic(badNh) 111 case ldt < max(1, nh): 112 panic(badLdT) 113 case nv < 0: 114 panic(nvLT0) 115 case ldwv < max(1, nw): 116 panic(badLdWV) 117 case lwork < max(1, 2*nw) && lwork != -1: 118 panic(badLWork) 119 case len(work) < max(1, lwork): 120 panic(shortWork) 121 case recur < 0: 122 panic(recurLT0) 123 } 124 125 // Quick return for zero window size. 126 if nw == 0 { 127 work[0] = 1 128 return 0, 0 129 } 130 131 // LAPACK code does not enforce the documented behavior 132 // nw <= kbot-ktop+1 133 // but we do (we panic above). 134 jw := nw 135 lwkopt := max(1, 2*nw) 136 if jw > 2 { 137 // Workspace query call to Dgehrd. 138 impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1) 139 lwk1 := int(work[0]) 140 // Workspace query call to Dormhr. 141 impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1) 142 lwk2 := int(work[0]) 143 if recur > 0 { 144 // Workspace query call to Dlaqr04. 145 impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1) 146 lwk3 := int(work[0]) 147 // Optimal workspace. 148 lwkopt = max(jw+max(lwk1, lwk2), lwk3) 149 } else { 150 // Optimal workspace. 151 lwkopt = jw + max(lwk1, lwk2) 152 } 153 } 154 // Quick return in case of workspace query. 155 if lwork == -1 { 156 work[0] = float64(lwkopt) 157 return 0, 0 158 } 159 160 // Check input slices only if not doing workspace query. 161 switch { 162 case len(h) < (n-1)*ldh+n: 163 panic(shortH) 164 case len(v) < (nw-1)*ldv+nw: 165 panic(shortV) 166 case len(t) < (nw-1)*ldt+nh: 167 panic(shortT) 168 case len(wv) < (nv-1)*ldwv+nw: 169 panic(shortWV) 170 case wantz && len(z) < (n-1)*ldz+n: 171 panic(shortZ) 172 case len(sr) != kbot+1: 173 panic(badLenSr) 174 case len(si) != kbot+1: 175 panic(badLenSi) 176 case ktop > 0 && h[ktop*ldh+ktop-1] != 0: 177 panic(notIsolated) 178 case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0: 179 panic(notIsolated) 180 } 181 182 // Machine constants. 183 ulp := dlamchP 184 smlnum := float64(n) / ulp * dlamchS 185 186 // Setup deflation window. 187 var s float64 188 kwtop := kbot - jw + 1 189 if kwtop != ktop { 190 s = h[kwtop*ldh+kwtop-1] 191 } 192 if kwtop == kbot { 193 // 1×1 deflation window. 194 sr[kwtop] = h[kwtop*ldh+kwtop] 195 si[kwtop] = 0 196 ns = 1 197 nd = 0 198 if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) { 199 ns = 0 200 nd = 1 201 if kwtop > ktop { 202 h[kwtop*ldh+kwtop-1] = 0 203 } 204 } 205 work[0] = 1 206 return ns, nd 207 } 208 209 // Convert to spike-triangular form. In case of a rare QR failure, this 210 // routine continues to do aggressive early deflation using that part of 211 // the deflation window that converged using infqr here and there to 212 // keep track. 213 impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt) 214 bi := blas64.Implementation() 215 bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1) 216 impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv) 217 nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork) 218 var infqr int 219 if recur > 0 && jw > nmin { 220 infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1) 221 } else { 222 infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv) 223 } 224 // Note that ilo == 0 which conveniently coincides with the success 225 // value of infqr, that is, infqr as an index always points to the first 226 // converged eigenvalue. 227 228 // Dtrexc needs a clean margin near the diagonal. 229 for j := 0; j < jw-3; j++ { 230 t[(j+2)*ldt+j] = 0 231 t[(j+3)*ldt+j] = 0 232 } 233 if jw >= 3 { 234 t[(jw-1)*ldt+jw-3] = 0 235 } 236 237 ns = jw 238 ilst := infqr 239 // Deflation detection loop. 240 for ilst < ns { 241 bulge := false 242 if ns >= 2 { 243 bulge = t[(ns-1)*ldt+ns-2] != 0 244 } 245 if !bulge { 246 // Real eigenvalue. 247 abst := math.Abs(t[(ns-1)*ldt+ns-1]) 248 if abst == 0 { 249 abst = math.Abs(s) 250 } 251 if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) { 252 // Deflatable. 253 ns-- 254 } else { 255 // Undeflatable, move it up out of the way. 256 // Dtrexc can not fail in this case. 257 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) 258 ilst++ 259 } 260 continue 261 } 262 // Complex conjugate pair. 263 abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1])) 264 if abst == 0 { 265 abst = math.Abs(s) 266 } 267 if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) { 268 // Deflatable. 269 ns -= 2 270 } else { 271 // Undeflatable, move them up out of the way. 272 // Dtrexc does the right thing with ilst in case of a 273 // rare exchange failure. 274 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) 275 ilst += 2 276 } 277 } 278 279 // Return to Hessenberg form. 280 if ns == 0 { 281 s = 0 282 } 283 if ns < jw { 284 // Sorting diagonal blocks of T improves accuracy for graded 285 // matrices. Bubble sort deals well with exchange failures. 286 sorted := false 287 i := ns 288 for !sorted { 289 sorted = true 290 kend := i - 1 291 i = infqr 292 var k int 293 if i == ns-1 || t[(i+1)*ldt+i] == 0 { 294 k = i + 1 295 } else { 296 k = i + 2 297 } 298 for k <= kend { 299 var evi float64 300 if k == i+1 { 301 evi = math.Abs(t[i*ldt+i]) 302 } else { 303 evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1])) 304 } 305 306 var evk float64 307 if k == kend || t[(k+1)*ldt+k] == 0 { 308 evk = math.Abs(t[k*ldt+k]) 309 } else { 310 evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1])) 311 } 312 313 if evi >= evk { 314 i = k 315 } else { 316 sorted = false 317 _, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work) 318 if ok { 319 i = ilst 320 } else { 321 i = k 322 } 323 } 324 if i == kend || t[(i+1)*ldt+i] == 0 { 325 k = i + 1 326 } else { 327 k = i + 2 328 } 329 } 330 } 331 } 332 333 // Restore shift/eigenvalue array from T. 334 for i := jw - 1; i >= infqr; { 335 if i == infqr || t[i*ldt+i-1] == 0 { 336 sr[kwtop+i] = t[i*ldt+i] 337 si[kwtop+i] = 0 338 i-- 339 continue 340 } 341 aa := t[(i-1)*ldt+i-1] 342 bb := t[(i-1)*ldt+i] 343 cc := t[i*ldt+i-1] 344 dd := t[i*ldt+i] 345 _, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd) 346 i -= 2 347 } 348 349 if ns < jw || s == 0 { 350 if ns > 1 && s != 0 { 351 // Reflect spike back into lower triangle. 352 bi.Dcopy(ns, v[:ns], 1, work[:ns], 1) 353 _, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1) 354 work[0] = 1 355 impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt) 356 impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:]) 357 impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:]) 358 impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:]) 359 impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw) 360 } 361 362 // Copy updated reduced window into place. 363 if kwtop > 0 { 364 h[kwtop*ldh+kwtop-1] = s * v[0] 365 } 366 impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh) 367 bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1) 368 369 // Accumulate orthogonal matrix in order to update H and Z, if 370 // requested. 371 if ns > 1 && s != 0 { 372 // work[:ns-1] contains the elementary reflectors stored 373 // by a call to Dgehrd above. 374 impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1, 375 t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw) 376 } 377 378 // Update vertical slab in H. 379 var ltop int 380 if !wantt { 381 ltop = ktop 382 } 383 for krow := ltop; krow < kwtop; krow += nv { 384 kln := min(nv, kwtop-krow) 385 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, 386 1, h[krow*ldh+kwtop:], ldh, v, ldv, 387 0, wv, ldwv) 388 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh) 389 } 390 391 // Update horizontal slab in H. 392 if wantt { 393 for kcol := kbot + 1; kcol < n; kcol += nh { 394 kln := min(nh, n-kcol) 395 bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw, 396 1, v, ldv, h[kwtop*ldh+kcol:], ldh, 397 0, t, ldt) 398 impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh) 399 } 400 } 401 402 // Update vertical slab in Z. 403 if wantz { 404 for krow := iloz; krow <= ihiz; krow += nv { 405 kln := min(nv, ihiz-krow+1) 406 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, 407 1, z[krow*ldz+kwtop:], ldz, v, ldv, 408 0, wv, ldwv) 409 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz) 410 } 411 } 412 } 413 414 // The number of deflations. 415 nd = jw - ns 416 // Shifts are converged eigenvalues that could not be deflated. 417 // Subtracting infqr from the spike length takes care of the case of a 418 // rare QR failure while calculating eigenvalues of the deflation 419 // window. 420 ns -= infqr 421 work[0] = float64(lwkopt) 422 return ns, nd 423 }