github.com/gopherd/gonum@v0.0.4/lapack/gonum/dlaqr23.go (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math" 9 10 "github.com/gopherd/gonum/blas" 11 "github.com/gopherd/gonum/blas/blas64" 12 "github.com/gopherd/gonum/lapack" 13 ) 14 15 // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper 16 // Hessenberg matrix to detect and deflate fully converged eigenvalues from a 17 // trailing principal submatrix using aggressive early deflation [1]. 18 // 19 // On return, H will be overwritten by a new Hessenberg matrix that is a 20 // perturbation of an orthogonal similarity transformation of H. It is hoped 21 // that on output H will have many zero subdiagonal entries. 22 // 23 // If wantt is true, the matrix H will be fully updated so that the 24 // quasi-triangular Schur factor can be computed. If wantt is false, then only 25 // enough of H will be updated to preserve the eigenvalues. 26 // 27 // If wantz is true, the orthogonal similarity transformation will be 28 // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced. 29 // 30 // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal 31 // of H. It must hold that 32 // 0 <= ilo <= ihi < n if n > 0, 33 // ilo == 0 and ihi == -1 if n == 0, 34 // and the block must be isolated, that is, it must hold that 35 // ktop == 0 or H[ktop,ktop-1] == 0, 36 // kbot == n-1 or H[kbot+1,kbot] == 0, 37 // otherwise Dlaqr23 will panic. 38 // 39 // nw is the deflation window size. It must hold that 40 // 0 <= nw <= kbot-ktop+1, 41 // otherwise Dlaqr23 will panic. 42 // 43 // iloz and ihiz specify the rows of the n×n matrix Z to which transformations 44 // will be applied if wantz is true. It must hold that 45 // 0 <= iloz <= ktop, and kbot <= ihiz < n, 46 // otherwise Dlaqr23 will panic. 47 // 48 // sr and si must have length kbot+1, otherwise Dlaqr23 will panic. 49 // 50 // v and ldv represent an nw×nw work matrix. 51 // t and ldt represent an nw×nh work matrix, and nh must be at least nw. 52 // wv and ldwv represent an nv×nw work matrix. 53 // 54 // work must have length at least lwork and lwork must be at least max(1,2*nw), 55 // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater 56 // efficiency. On return, work[0] will contain the optimal value of lwork. 57 // 58 // If lwork is -1, instead of performing Dlaqr23, the function only estimates the 59 // optimal workspace size and stores it into work[0]. Neither h nor z are 60 // accessed. 61 // 62 // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves 63 // as DLAQR3, for recur == 0 it behaves as DLAQR2. 64 // 65 // On return, ns and nd will contain respectively the number of unconverged 66 // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in 67 // sr and si. 68 // 69 // On return, the real and imaginary parts of approximate eigenvalues that may 70 // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1] 71 // and si[kbot-nd-ns+1:kbot-nd+1]. 72 // 73 // On return, the real and imaginary parts of converged eigenvalues will be 74 // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1]. 75 // 76 // References: 77 // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: 78 // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973 79 // URL: http://dx.doi.org/10.1137/S0895479801384585 80 // 81 func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) { 82 switch { 83 case n < 0: 84 panic(nLT0) 85 case ktop < 0 || max(0, n-1) < ktop: 86 panic(badKtop) 87 case kbot < min(ktop, n-1) || n <= kbot: 88 panic(badKbot) 89 case nw < 0 || kbot-ktop+1+1 < nw: 90 panic(badNw) 91 case ldh < max(1, n): 92 panic(badLdH) 93 case wantz && (iloz < 0 || ktop < iloz): 94 panic(badIloz) 95 case wantz && (ihiz < kbot || n <= ihiz): 96 panic(badIhiz) 97 case ldz < 1, wantz && ldz < n: 98 panic(badLdZ) 99 case ldv < max(1, nw): 100 panic(badLdV) 101 case nh < nw: 102 panic(badNh) 103 case ldt < max(1, nh): 104 panic(badLdT) 105 case nv < 0: 106 panic(nvLT0) 107 case ldwv < max(1, nw): 108 panic(badLdWV) 109 case lwork < max(1, 2*nw) && lwork != -1: 110 panic(badLWork) 111 case len(work) < max(1, lwork): 112 panic(shortWork) 113 case recur < 0: 114 panic(recurLT0) 115 } 116 117 // Quick return for zero window size. 118 if nw == 0 { 119 work[0] = 1 120 return 0, 0 121 } 122 123 // LAPACK code does not enforce the documented behavior 124 // nw <= kbot-ktop+1 125 // but we do (we panic above). 126 jw := nw 127 lwkopt := max(1, 2*nw) 128 if jw > 2 { 129 // Workspace query call to Dgehrd. 130 impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1) 131 lwk1 := int(work[0]) 132 // Workspace query call to Dormhr. 133 impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1) 134 lwk2 := int(work[0]) 135 if recur > 0 { 136 // Workspace query call to Dlaqr04. 137 impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1) 138 lwk3 := int(work[0]) 139 // Optimal workspace. 140 lwkopt = max(jw+max(lwk1, lwk2), lwk3) 141 } else { 142 // Optimal workspace. 143 lwkopt = jw + max(lwk1, lwk2) 144 } 145 } 146 // Quick return in case of workspace query. 147 if lwork == -1 { 148 work[0] = float64(lwkopt) 149 return 0, 0 150 } 151 152 // Check input slices only if not doing workspace query. 153 switch { 154 case len(h) < (n-1)*ldh+n: 155 panic(shortH) 156 case len(v) < (nw-1)*ldv+nw: 157 panic(shortV) 158 case len(t) < (nw-1)*ldt+nh: 159 panic(shortT) 160 case len(wv) < (nv-1)*ldwv+nw: 161 panic(shortWV) 162 case wantz && len(z) < (n-1)*ldz+n: 163 panic(shortZ) 164 case len(sr) != kbot+1: 165 panic(badLenSr) 166 case len(si) != kbot+1: 167 panic(badLenSi) 168 case ktop > 0 && h[ktop*ldh+ktop-1] != 0: 169 panic(notIsolated) 170 case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0: 171 panic(notIsolated) 172 } 173 174 // Machine constants. 175 ulp := dlamchP 176 smlnum := float64(n) / ulp * dlamchS 177 178 // Setup deflation window. 179 var s float64 180 kwtop := kbot - jw + 1 181 if kwtop != ktop { 182 s = h[kwtop*ldh+kwtop-1] 183 } 184 if kwtop == kbot { 185 // 1×1 deflation window. 186 sr[kwtop] = h[kwtop*ldh+kwtop] 187 si[kwtop] = 0 188 ns = 1 189 nd = 0 190 if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) { 191 ns = 0 192 nd = 1 193 if kwtop > ktop { 194 h[kwtop*ldh+kwtop-1] = 0 195 } 196 } 197 work[0] = 1 198 return ns, nd 199 } 200 201 // Convert to spike-triangular form. In case of a rare QR failure, this 202 // routine continues to do aggressive early deflation using that part of 203 // the deflation window that converged using infqr here and there to 204 // keep track. 205 impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt) 206 bi := blas64.Implementation() 207 bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1) 208 impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv) 209 nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork) 210 var infqr int 211 if recur > 0 && jw > nmin { 212 infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1) 213 } else { 214 infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv) 215 } 216 // Note that ilo == 0 which conveniently coincides with the success 217 // value of infqr, that is, infqr as an index always points to the first 218 // converged eigenvalue. 219 220 // Dtrexc needs a clean margin near the diagonal. 221 for j := 0; j < jw-3; j++ { 222 t[(j+2)*ldt+j] = 0 223 t[(j+3)*ldt+j] = 0 224 } 225 if jw >= 3 { 226 t[(jw-1)*ldt+jw-3] = 0 227 } 228 229 ns = jw 230 ilst := infqr 231 // Deflation detection loop. 232 for ilst < ns { 233 bulge := false 234 if ns >= 2 { 235 bulge = t[(ns-1)*ldt+ns-2] != 0 236 } 237 if !bulge { 238 // Real eigenvalue. 239 abst := math.Abs(t[(ns-1)*ldt+ns-1]) 240 if abst == 0 { 241 abst = math.Abs(s) 242 } 243 if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) { 244 // Deflatable. 245 ns-- 246 } else { 247 // Undeflatable, move it up out of the way. 248 // Dtrexc can not fail in this case. 249 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) 250 ilst++ 251 } 252 continue 253 } 254 // Complex conjugate pair. 255 abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1])) 256 if abst == 0 { 257 abst = math.Abs(s) 258 } 259 if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) { 260 // Deflatable. 261 ns -= 2 262 } else { 263 // Undeflatable, move them up out of the way. 264 // Dtrexc does the right thing with ilst in case of a 265 // rare exchange failure. 266 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) 267 ilst += 2 268 } 269 } 270 271 // Return to Hessenberg form. 272 if ns == 0 { 273 s = 0 274 } 275 if ns < jw { 276 // Sorting diagonal blocks of T improves accuracy for graded 277 // matrices. Bubble sort deals well with exchange failures. 278 sorted := false 279 i := ns 280 for !sorted { 281 sorted = true 282 kend := i - 1 283 i = infqr 284 var k int 285 if i == ns-1 || t[(i+1)*ldt+i] == 0 { 286 k = i + 1 287 } else { 288 k = i + 2 289 } 290 for k <= kend { 291 var evi float64 292 if k == i+1 { 293 evi = math.Abs(t[i*ldt+i]) 294 } else { 295 evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1])) 296 } 297 298 var evk float64 299 if k == kend || t[(k+1)*ldt+k] == 0 { 300 evk = math.Abs(t[k*ldt+k]) 301 } else { 302 evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1])) 303 } 304 305 if evi >= evk { 306 i = k 307 } else { 308 sorted = false 309 _, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work) 310 if ok { 311 i = ilst 312 } else { 313 i = k 314 } 315 } 316 if i == kend || t[(i+1)*ldt+i] == 0 { 317 k = i + 1 318 } else { 319 k = i + 2 320 } 321 } 322 } 323 } 324 325 // Restore shift/eigenvalue array from T. 326 for i := jw - 1; i >= infqr; { 327 if i == infqr || t[i*ldt+i-1] == 0 { 328 sr[kwtop+i] = t[i*ldt+i] 329 si[kwtop+i] = 0 330 i-- 331 continue 332 } 333 aa := t[(i-1)*ldt+i-1] 334 bb := t[(i-1)*ldt+i] 335 cc := t[i*ldt+i-1] 336 dd := t[i*ldt+i] 337 _, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd) 338 i -= 2 339 } 340 341 if ns < jw || s == 0 { 342 if ns > 1 && s != 0 { 343 // Reflect spike back into lower triangle. 344 bi.Dcopy(ns, v[:ns], 1, work[:ns], 1) 345 _, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1) 346 work[0] = 1 347 impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt) 348 impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:]) 349 impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:]) 350 impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:]) 351 impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw) 352 } 353 354 // Copy updated reduced window into place. 355 if kwtop > 0 { 356 h[kwtop*ldh+kwtop-1] = s * v[0] 357 } 358 impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh) 359 bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1) 360 361 // Accumulate orthogonal matrix in order to update H and Z, if 362 // requested. 363 if ns > 1 && s != 0 { 364 // work[:ns-1] contains the elementary reflectors stored 365 // by a call to Dgehrd above. 366 impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1, 367 t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw) 368 } 369 370 // Update vertical slab in H. 371 var ltop int 372 if !wantt { 373 ltop = ktop 374 } 375 for krow := ltop; krow < kwtop; krow += nv { 376 kln := min(nv, kwtop-krow) 377 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, 378 1, h[krow*ldh+kwtop:], ldh, v, ldv, 379 0, wv, ldwv) 380 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh) 381 } 382 383 // Update horizontal slab in H. 384 if wantt { 385 for kcol := kbot + 1; kcol < n; kcol += nh { 386 kln := min(nh, n-kcol) 387 bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw, 388 1, v, ldv, h[kwtop*ldh+kcol:], ldh, 389 0, t, ldt) 390 impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh) 391 } 392 } 393 394 // Update vertical slab in Z. 395 if wantz { 396 for krow := iloz; krow <= ihiz; krow += nv { 397 kln := min(nv, ihiz-krow+1) 398 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, 399 1, z[krow*ldz+kwtop:], ldz, v, ldv, 400 0, wv, ldwv) 401 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz) 402 } 403 } 404 } 405 406 // The number of deflations. 407 nd = jw - ns 408 // Shifts are converged eigenvalues that could not be deflated. 409 // Subtracting infqr from the spike length takes care of the case of a 410 // rare QR failure while calculating eigenvalues of the deflation 411 // window. 412 ns -= infqr 413 work[0] = float64(lwkopt) 414 return ns, nd 415 }