github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dlaqr04.go (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math" 9 10 "github.com/jingcheng-WU/gonum/blas" 11 ) 12 13 // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix 14 // H, and optionally the matrices T and Z from the Schur decomposition 15 // H = Z T Zᵀ 16 // where T is an upper quasi-triangular matrix (the Schur form), and Z is the 17 // orthogonal matrix of Schur vectors. 18 // 19 // wantt indicates whether the full Schur form T is required. If wantt is false, 20 // then only enough of H will be updated to preserve the eigenvalues. 21 // 22 // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it 23 // is true, the orthogonal similarity transformation will be accumulated into 24 // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced. 25 // 26 // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that 27 // 0 <= ilo <= ihi < n if n > 0, 28 // ilo == 0 and ihi == -1 if n == 0, 29 // and the block must be isolated, that is, 30 // ilo == 0 or H[ilo,ilo-1] == 0, 31 // ihi == n-1 or H[ihi+1,ihi] == 0, 32 // otherwise Dlaqr04 will panic. 33 // 34 // wr and wi must have length ihi+1. 35 // 36 // iloz and ihiz specify the rows of Z to which transformations will be applied 37 // if wantz is true. It must hold that 38 // 0 <= iloz <= ilo, and ihi <= ihiz < n, 39 // otherwise Dlaqr04 will panic. 40 // 41 // work must have length at least lwork and lwork must be 42 // lwork >= 1 if n <= 11, 43 // lwork >= n if n > 11, 44 // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for 45 // optimal performance. On return, work[0] will contain the optimal value of 46 // lwork. 47 // 48 // If lwork is -1, instead of performing Dlaqr04, the function only estimates the 49 // optimal workspace size and stores it into work[0]. Neither h nor z are 50 // accessed. 51 // 52 // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves 53 // as DLAQR0, for recur == 0 it behaves as DLAQR4. 54 // 55 // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1]. 56 // 57 // If unconverged is zero and wantt is true, H will contain on return the upper 58 // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks 59 // (corresponding to complex conjugate pairs of eigenvalues) will be returned in 60 // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0. 61 // 62 // If unconverged is zero and if wantt is false, the contents of h on return is 63 // unspecified. 64 // 65 // If unconverged is zero, all the eigenvalues have been computed and their real 66 // and imaginary parts will be stored on return in wr[ilo:ihi+1] and 67 // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex 68 // conjugate pair, they are stored in consecutive elements of wr and wi, say the 69 // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the 70 // eigenvalues are stored in the same order as on the diagonal of the Schur form 71 // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal 72 // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i]. 73 // 74 // If unconverged is positive, some eigenvalues have not converged, and 75 // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those 76 // eigenvalues which have been successfully computed. Failures are rare. 77 // 78 // If unconverged is positive and wantt is true, then on return 79 // (initial H)*U = U*(final H), (*) 80 // where U is an orthogonal matrix. The final H is upper Hessenberg and 81 // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular. 82 // 83 // If unconverged is positive and wantt is false, on return the remaining 84 // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix 85 // H[ilo:unconverged,ilo:unconverged]. 86 // 87 // If unconverged is positive and wantz is true, then on return 88 // (final Z) = (initial Z)*U, 89 // where U is the orthogonal matrix in (*) regardless of the value of wantt. 90 // 91 // References: 92 // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I: 93 // Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix 94 // Anal. Appl. 23(4) (2002), pp. 929—947 95 // URL: http://dx.doi.org/10.1137/S0895479801384573 96 // [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: 97 // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973 98 // URL: http://dx.doi.org/10.1137/S0895479801384585 99 // 100 // Dlaqr04 is an internal routine. It is exported for testing purposes. 101 func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) { 102 const ( 103 // Matrices of order ntiny or smaller must be processed by 104 // Dlahqr because of insufficient subdiagonal scratch space. 105 // This is a hard limit. 106 ntiny = 11 107 // Exceptional deflation windows: try to cure rare slow 108 // convergence by varying the size of the deflation window after 109 // kexnw iterations. 110 kexnw = 5 111 // Exceptional shifts: try to cure rare slow convergence with 112 // ad-hoc exceptional shifts every kexsh iterations. 113 kexsh = 6 114 115 // See https://github.com/gonum/lapack/pull/151#discussion_r68162802 116 // and the surrounding discussion for an explanation where these 117 // constants come from. 118 // TODO(vladimir-ch): Similar constants for exceptional shifts 119 // are used also in dlahqr.go. The first constant is different 120 // there, it is equal to 3. Why? And does it matter? 121 wilk1 = 0.75 122 wilk2 = -0.4375 123 ) 124 125 switch { 126 case n < 0: 127 panic(nLT0) 128 case ilo < 0 || max(0, n-1) < ilo: 129 panic(badIlo) 130 case ihi < min(ilo, n-1) || n <= ihi: 131 panic(badIhi) 132 case ldh < max(1, n): 133 panic(badLdH) 134 case wantz && (iloz < 0 || ilo < iloz): 135 panic(badIloz) 136 case wantz && (ihiz < ihi || n <= ihiz): 137 panic(badIhiz) 138 case ldz < 1, wantz && ldz < n: 139 panic(badLdZ) 140 case lwork < 1 && lwork != -1: 141 panic(badLWork) 142 // TODO(vladimir-ch): Enable if and when we figure out what the minimum 143 // necessary lwork value is. Dlaqr04 says that the minimum is n which 144 // clashes with Dlaqr23's opinion about optimal work when nw <= 2 145 // (independent of n). 146 // case lwork < n && n > ntiny && lwork != -1: 147 // panic(badLWork) 148 case len(work) < max(1, lwork): 149 panic(shortWork) 150 case recur < 0: 151 panic(recurLT0) 152 } 153 154 // Quick return. 155 if n == 0 { 156 work[0] = 1 157 return 0 158 } 159 160 if lwork != -1 { 161 switch { 162 case len(h) < (n-1)*ldh+n: 163 panic(shortH) 164 case len(wr) != ihi+1: 165 panic(badLenWr) 166 case len(wi) != ihi+1: 167 panic(badLenWi) 168 case wantz && len(z) < (n-1)*ldz+n: 169 panic(shortZ) 170 case ilo > 0 && h[ilo*ldh+ilo-1] != 0: 171 panic(notIsolated) 172 case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0: 173 panic(notIsolated) 174 } 175 } 176 177 if n <= ntiny { 178 // Tiny matrices must use Dlahqr. 179 if lwork == -1 { 180 work[0] = 1 181 return 0 182 } 183 return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz) 184 } 185 186 // Use small bulge multi-shift QR with aggressive early deflation on 187 // larger-than-tiny matrices. 188 var jbcmpz string 189 if wantt { 190 jbcmpz = "S" 191 } else { 192 jbcmpz = "E" 193 } 194 if wantz { 195 jbcmpz += "V" 196 } else { 197 jbcmpz += "N" 198 } 199 200 var fname string 201 if recur > 0 { 202 fname = "DLAQR0" 203 } else { 204 fname = "DLAQR4" 205 } 206 // nwr is the recommended deflation window size. n is greater than 11, 207 // so there is enough subdiagonal workspace for nwr >= 2 as required. 208 // (In fact, there is enough subdiagonal space for nwr >= 3.) 209 // TODO(vladimir-ch): If there is enough space for nwr >= 3, should we 210 // use it? 211 nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork) 212 nwr = max(2, nwr) 213 nwr = min(ihi-ilo+1, min((n-1)/3, nwr)) 214 215 // nsr is the recommended number of simultaneous shifts. n is greater 216 // than 11, so there is enough subdiagonal workspace for nsr to be even 217 // and greater than or equal to two as required. 218 nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork) 219 nsr = min(nsr, min((n+6)/9, ihi-ilo)) 220 nsr = max(2, nsr&^1) 221 222 // Workspace query call to Dlaqr23. 223 impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz, ihiz, z, ldz, 224 wr, wi, h, ldh, n, h, ldh, n, h, ldh, work, -1, recur) 225 // Optimal workspace is max(Dlaqr5, Dlaqr23). 226 lwkopt := max(3*nsr/2, int(work[0])) 227 // Quick return in case of workspace query. 228 if lwork == -1 { 229 work[0] = float64(lwkopt) 230 return 0 231 } 232 233 // Dlahqr/Dlaqr04 crossover point. 234 nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork) 235 nmin = max(ntiny, nmin) 236 237 // Nibble determines when to skip a multi-shift QR sweep (Dlaqr5). 238 nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork) 239 nibble = max(0, nibble) 240 241 // Computation mode of far-from-diagonal orthogonal updates in Dlaqr5. 242 kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork) 243 kacc22 = max(0, min(kacc22, 2)) 244 245 // nwmax is the largest possible deflation window for which there is 246 // sufficient workspace. 247 nwmax := min((n-1)/3, lwork/2) 248 nw := nwmax // Start with maximum deflation window size. 249 250 // nsmax is the largest number of simultaneous shifts for which there is 251 // sufficient workspace. 252 nsmax := min((n+6)/9, 2*lwork/3) &^ 1 253 254 ndfl := 1 // Number of iterations since last deflation. 255 ndec := 0 // Deflation window size decrement. 256 257 // Main loop. 258 var ( 259 itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1)) 260 it = 0 261 ) 262 for kbot := ihi; kbot >= ilo; { 263 if it == itmax { 264 unconverged = kbot + 1 265 break 266 } 267 it++ 268 269 // Locate active block. 270 ktop := ilo 271 for k := kbot; k >= ilo+1; k-- { 272 if h[k*ldh+k-1] == 0 { 273 ktop = k 274 break 275 } 276 } 277 278 // Select deflation window size nw. 279 // 280 // Typical Case: 281 // If possible and advisable, nibble the entire active block. 282 // If not, use size min(nwr,nwmax) or min(nwr+1,nwmax) 283 // depending upon which has the smaller corresponding 284 // subdiagonal entry (a heuristic). 285 // 286 // Exceptional Case: 287 // If there have been no deflations in kexnw or more 288 // iterations, then vary the deflation window size. At first, 289 // because larger windows are, in general, more powerful than 290 // smaller ones, rapidly increase the window to the maximum 291 // possible. Then, gradually reduce the window size. 292 nh := kbot - ktop + 1 293 nwupbd := min(nh, nwmax) 294 if ndfl < kexnw { 295 nw = min(nwupbd, nwr) 296 } else { 297 nw = min(nwupbd, 2*nw) 298 } 299 if nw < nwmax { 300 if nw >= nh-1 { 301 nw = nh 302 } else { 303 kwtop := kbot - nw + 1 304 if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) { 305 nw++ 306 } 307 } 308 } 309 if ndfl < kexnw { 310 ndec = -1 311 } else if ndec >= 0 || nw >= nwupbd { 312 ndec++ 313 if nw-ndec < 2 { 314 ndec = 0 315 } 316 nw -= ndec 317 } 318 319 // Split workspace under the subdiagonal of H into: 320 // - an nw×nw work array V in the lower left-hand corner, 321 // - an nw×nhv horizontal work array along the bottom edge (nhv 322 // must be at least nw but more is better), 323 // - an nve×nw vertical work array along the left-hand-edge 324 // (nhv can be any positive integer but more is better). 325 kv := n - nw 326 kt := nw 327 kwv := nw + 1 328 nhv := n - kwv - kt 329 // Aggressive early deflation. 330 ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, 331 h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1], 332 h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur) 333 334 // Adjust kbot accounting for new deflations. 335 kbot -= ld 336 // ks points to the shifts. 337 ks := kbot - ls + 1 338 339 // Skip an expensive QR sweep if there is a (partly heuristic) 340 // reason to expect that many eigenvalues will deflate without 341 // it. Here, the QR sweep is skipped if many eigenvalues have 342 // just been deflated or if the remaining active block is small. 343 if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) { 344 // ld is positive, note progress. 345 ndfl = 1 346 continue 347 } 348 349 // ns is the nominal number of simultaneous shifts. This may be 350 // lowered (slightly) if Dlaqr23 did not provide that many 351 // shifts. 352 ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1 353 354 // If there have been no deflations in a multiple of kexsh 355 // iterations, then try exceptional shifts. Otherwise use shifts 356 // provided by Dlaqr23 above or from the eigenvalues of a 357 // trailing principal submatrix. 358 if ndfl%kexsh == 0 { 359 ks = kbot - ns + 1 360 for i := kbot; i > max(ks, ktop+1); i -= 2 { 361 ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2]) 362 aa := wilk1*ss + h[i*ldh+i] 363 _, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ = 364 impl.Dlanv2(aa, ss, wilk2*ss, aa) 365 } 366 if ks == ktop { 367 wr[ks+1] = h[(ks+1)*ldh+ks+1] 368 wi[ks+1] = 0 369 wr[ks] = wr[ks+1] 370 wi[ks] = wi[ks+1] 371 } 372 } else { 373 // If we got ns/2 or fewer shifts, use Dlahqr or recur 374 // into Dlaqr04 on a trailing principal submatrix to get 375 // more. Since ns <= nsmax <=(n+6)/9, there is enough 376 // space below the subdiagonal to fit an ns×ns scratch 377 // array. 378 if kbot-ks+1 <= ns/2 { 379 ks = kbot - ns + 1 380 kt = n - ns 381 impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh) 382 if ns > nmin && recur > 0 { 383 ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh, 384 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1) 385 } else { 386 ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh, 387 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 1) 388 } 389 // In case of a rare QR failure use eigenvalues 390 // of the trailing 2×2 principal submatrix. 391 if ks >= kbot { 392 aa := h[(kbot-1)*ldh+kbot-1] 393 bb := h[(kbot-1)*ldh+kbot] 394 cc := h[kbot*ldh+kbot-1] 395 dd := h[kbot*ldh+kbot] 396 _, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ = 397 impl.Dlanv2(aa, bb, cc, dd) 398 ks = kbot - 1 399 } 400 } 401 402 if kbot-ks+1 > ns { 403 // Sorting the shifts helps a little. Bubble 404 // sort keeps complex conjugate pairs together. 405 sorted := false 406 for k := kbot; k > ks; k-- { 407 if sorted { 408 break 409 } 410 sorted = true 411 for i := ks; i < k; i++ { 412 if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) { 413 continue 414 } 415 sorted = false 416 wr[i], wr[i+1] = wr[i+1], wr[i] 417 wi[i], wi[i+1] = wi[i+1], wi[i] 418 } 419 } 420 } 421 422 // Shuffle shifts into pairs of real shifts and pairs of 423 // complex conjugate shifts using the fact that complex 424 // conjugate shifts are already adjacent to one another. 425 // TODO(vladimir-ch): The shuffling here could probably 426 // be removed but I'm not sure right now and it's safer 427 // to leave it. 428 for i := kbot; i > ks+1; i -= 2 { 429 if wi[i] == -wi[i-1] { 430 continue 431 } 432 wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i] 433 wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i] 434 } 435 } 436 437 // If there are only two shifts and both are real, then use only one. 438 if kbot-ks+1 == 2 && wi[kbot] == 0 { 439 if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) { 440 wr[kbot-1] = wr[kbot] 441 } else { 442 wr[kbot] = wr[kbot-1] 443 } 444 } 445 446 // Use up to ns of the smallest magnitude shifts. If there 447 // aren't ns shifts available, then use them all, possibly 448 // dropping one to make the number of shifts even. 449 ns = min(ns, kbot-ks+1) &^ 1 450 ks = kbot - ns + 1 451 452 // Split workspace under the subdiagonal into: 453 // - a kdu×kdu work array U in the lower left-hand-corner, 454 // - a kdu×nhv horizontal work array WH along the bottom edge 455 // (nhv must be at least kdu but more is better), 456 // - an nhv×kdu vertical work array WV along the left-hand-edge 457 // (nhv must be at least kdu but more is better). 458 kdu := 3*ns - 3 459 ku := n - kdu 460 kwh := kdu 461 kwv = kdu + 3 462 nhv = n - kwv - kdu 463 // Small-bulge multi-shift QR sweep. 464 impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns, 465 wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz, 466 work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh) 467 468 // Note progress (or the lack of it). 469 if ld > 0 { 470 ndfl = 1 471 } else { 472 ndfl++ 473 } 474 } 475 476 work[0] = float64(lwkopt) 477 return unconverged 478 }