gonum.org/v1/gonum@v0.14.0/lapack/gonum/dlaqr04.go (about) 1 // Copyright ©2016 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math" 9 10 "gonum.org/v1/gonum/blas" 11 ) 12 13 // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix 14 // H, and optionally the matrices T and Z from the Schur decomposition 15 // 16 // H = Z T Zᵀ 17 // 18 // where T is an upper quasi-triangular matrix (the Schur form), and Z is the 19 // orthogonal matrix of Schur vectors. 20 // 21 // wantt indicates whether the full Schur form T is required. If wantt is false, 22 // then only enough of H will be updated to preserve the eigenvalues. 23 // 24 // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it 25 // is true, the orthogonal similarity transformation will be accumulated into 26 // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced. 27 // 28 // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that 29 // 30 // 0 <= ilo <= ihi < n if n > 0, 31 // ilo == 0 and ihi == -1 if n == 0, 32 // 33 // and the block must be isolated, that is, 34 // 35 // ilo == 0 or H[ilo,ilo-1] == 0, 36 // ihi == n-1 or H[ihi+1,ihi] == 0, 37 // 38 // otherwise Dlaqr04 will panic. 39 // 40 // wr and wi must have length ihi+1. 41 // 42 // iloz and ihiz specify the rows of Z to which transformations will be applied 43 // if wantz is true. It must hold that 44 // 45 // 0 <= iloz <= ilo, and ihi <= ihiz < n, 46 // 47 // otherwise Dlaqr04 will panic. 48 // 49 // work must have length at least lwork and lwork must be 50 // 51 // lwork >= 1 if n <= 11, 52 // lwork >= n if n > 11, 53 // 54 // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for 55 // optimal performance. On return, work[0] will contain the optimal value of 56 // lwork. 57 // 58 // If lwork is -1, instead of performing Dlaqr04, the function only estimates the 59 // optimal workspace size and stores it into work[0]. Neither h nor z are 60 // accessed. 61 // 62 // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves 63 // as DLAQR0, for recur == 0 it behaves as DLAQR4. 64 // 65 // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1]. 66 // 67 // If unconverged is zero and wantt is true, H will contain on return the upper 68 // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks 69 // (corresponding to complex conjugate pairs of eigenvalues) will be returned in 70 // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0. 71 // 72 // If unconverged is zero and if wantt is false, the contents of h on return is 73 // unspecified. 74 // 75 // If unconverged is zero, all the eigenvalues have been computed and their real 76 // and imaginary parts will be stored on return in wr[ilo:ihi+1] and 77 // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex 78 // conjugate pair, they are stored in consecutive elements of wr and wi, say the 79 // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the 80 // eigenvalues are stored in the same order as on the diagonal of the Schur form 81 // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal 82 // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i]. 83 // 84 // If unconverged is positive, some eigenvalues have not converged, and 85 // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those 86 // eigenvalues which have been successfully computed. Failures are rare. 87 // 88 // If unconverged is positive and wantt is true, then on return 89 // 90 // (initial H)*U = U*(final H), (*) 91 // 92 // where U is an orthogonal matrix. The final H is upper Hessenberg and 93 // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular. 94 // 95 // If unconverged is positive and wantt is false, on return the remaining 96 // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix 97 // H[ilo:unconverged,ilo:unconverged]. 98 // 99 // If unconverged is positive and wantz is true, then on return 100 // 101 // (final Z) = (initial Z)*U, 102 // 103 // where U is the orthogonal matrix in (*) regardless of the value of wantt. 104 // 105 // References: 106 // 107 // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I: 108 // Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix 109 // Anal. Appl. 23(4) (2002), pp. 929—947 110 // URL: http://dx.doi.org/10.1137/S0895479801384573 111 // [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: 112 // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973 113 // URL: http://dx.doi.org/10.1137/S0895479801384585 114 // 115 // Dlaqr04 is an internal routine. It is exported for testing purposes. 116 func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) { 117 const ( 118 // Matrices of order ntiny or smaller must be processed by 119 // Dlahqr because of insufficient subdiagonal scratch space. 120 // This is a hard limit. 121 ntiny = 15 122 // Exceptional deflation windows: try to cure rare slow 123 // convergence by varying the size of the deflation window after 124 // kexnw iterations. 125 kexnw = 5 126 // Exceptional shifts: try to cure rare slow convergence with 127 // ad-hoc exceptional shifts every kexsh iterations. 128 kexsh = 6 129 130 // See https://github.com/gonum/lapack/pull/151#discussion_r68162802 131 // and the surrounding discussion for an explanation where these 132 // constants come from. 133 // TODO(vladimir-ch): Similar constants for exceptional shifts 134 // are used also in dlahqr.go. The first constant is different 135 // there, it is equal to 3. Why? And does it matter? 136 wilk1 = 0.75 137 wilk2 = -0.4375 138 ) 139 140 switch { 141 case n < 0: 142 panic(nLT0) 143 case ilo < 0 || max(0, n-1) < ilo: 144 panic(badIlo) 145 case ihi < min(ilo, n-1) || n <= ihi: 146 panic(badIhi) 147 case ldh < max(1, n): 148 panic(badLdH) 149 case wantz && (iloz < 0 || ilo < iloz): 150 panic(badIloz) 151 case wantz && (ihiz < ihi || n <= ihiz): 152 panic(badIhiz) 153 case ldz < 1, wantz && ldz < n: 154 panic(badLdZ) 155 case lwork < 1 && lwork != -1: 156 panic(badLWork) 157 // TODO(vladimir-ch): Enable if and when we figure out what the minimum 158 // necessary lwork value is. Dlaqr04 says that the minimum is n which 159 // clashes with Dlaqr23's opinion about optimal work when nw <= 2 160 // (independent of n). 161 // case lwork < n && n > ntiny && lwork != -1: 162 // panic(badLWork) 163 case len(work) < max(1, lwork): 164 panic(shortWork) 165 case recur < 0: 166 panic(recurLT0) 167 } 168 169 // Quick return. 170 if n == 0 { 171 work[0] = 1 172 return 0 173 } 174 175 if lwork != -1 { 176 switch { 177 case len(h) < (n-1)*ldh+n: 178 panic(shortH) 179 case len(wr) != ihi+1: 180 panic(badLenWr) 181 case len(wi) != ihi+1: 182 panic(badLenWi) 183 case wantz && len(z) < (n-1)*ldz+n: 184 panic(shortZ) 185 case ilo > 0 && h[ilo*ldh+ilo-1] != 0: 186 panic(notIsolated) 187 case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0: 188 panic(notIsolated) 189 } 190 } 191 192 if n <= ntiny { 193 // Tiny matrices must use Dlahqr. 194 if lwork == -1 { 195 work[0] = 1 196 return 0 197 } 198 return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz) 199 } 200 201 // Use small bulge multi-shift QR with aggressive early deflation on 202 // larger-than-tiny matrices. 203 var jbcmpz string 204 if wantt { 205 jbcmpz = "S" 206 } else { 207 jbcmpz = "E" 208 } 209 if wantz { 210 jbcmpz += "V" 211 } else { 212 jbcmpz += "N" 213 } 214 215 var fname string 216 if recur > 0 { 217 fname = "DLAQR0" 218 } else { 219 fname = "DLAQR4" 220 } 221 // nwr is the recommended deflation window size. n is greater than ntiny, 222 // so there is enough subdiagonal workspace for nwr >= 2 as required. 223 // (In fact, there is enough subdiagonal space for nwr >= 4.) 224 // TODO(vladimir-ch): If there is enough space for nwr >= 4, should we 225 // use it? 226 nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork) 227 nwr = max(2, nwr) 228 nwr = min(ihi-ilo+1, min((n-1)/3, nwr)) 229 230 // nsr is the recommended number of simultaneous shifts. n is greater than 231 // ntiny, so there is enough subdiagonal workspace for nsr to be even and 232 // greater than or equal to two as required. 233 nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork) 234 nsr = min(nsr, min((n-3)/6, ihi-ilo)) 235 nsr = max(2, nsr&^1) 236 237 // Workspace query call to Dlaqr23. 238 impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz, ihiz, z, ldz, 239 wr, wi, h, ldh, n, h, ldh, n, h, ldh, work, -1, recur) 240 // Optimal workspace is max(Dlaqr5, Dlaqr23). 241 lwkopt := max(3*nsr/2, int(work[0])) 242 // Quick return in case of workspace query. 243 if lwork == -1 { 244 work[0] = float64(lwkopt) 245 return 0 246 } 247 248 // Dlahqr/Dlaqr04 crossover point. 249 nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork) 250 nmin = max(ntiny, nmin) 251 252 // Nibble determines when to skip a multi-shift QR sweep (Dlaqr5). 253 nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork) 254 nibble = max(0, nibble) 255 256 // Computation mode of far-from-diagonal orthogonal updates in Dlaqr5. 257 kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork) 258 kacc22 = max(0, min(kacc22, 2)) 259 260 // nwmax is the largest possible deflation window for which there is 261 // sufficient workspace. 262 nwmax := min((n-1)/3, lwork/2) 263 nw := nwmax // Start with maximum deflation window size. 264 265 // nsmax is the largest number of simultaneous shifts for which there is 266 // sufficient workspace. 267 nsmax := min((n-3)/6, 2*lwork/3) &^ 1 268 269 ndfl := 1 // Number of iterations since last deflation. 270 ndec := 0 // Deflation window size decrement. 271 272 // Main loop. 273 var ( 274 itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1)) 275 it = 0 276 ) 277 for kbot := ihi; kbot >= ilo; { 278 if it == itmax { 279 unconverged = kbot + 1 280 break 281 } 282 it++ 283 284 // Locate active block. 285 ktop := ilo 286 for k := kbot; k >= ilo+1; k-- { 287 if h[k*ldh+k-1] == 0 { 288 ktop = k 289 break 290 } 291 } 292 293 // Select deflation window size nw. 294 // 295 // Typical Case: 296 // If possible and advisable, nibble the entire active block. 297 // If not, use size min(nwr,nwmax) or min(nwr+1,nwmax) 298 // depending upon which has the smaller corresponding 299 // subdiagonal entry (a heuristic). 300 // 301 // Exceptional Case: 302 // If there have been no deflations in kexnw or more 303 // iterations, then vary the deflation window size. At first, 304 // because larger windows are, in general, more powerful than 305 // smaller ones, rapidly increase the window to the maximum 306 // possible. Then, gradually reduce the window size. 307 nh := kbot - ktop + 1 308 nwupbd := min(nh, nwmax) 309 if ndfl < kexnw { 310 nw = min(nwupbd, nwr) 311 } else { 312 nw = min(nwupbd, 2*nw) 313 } 314 if nw < nwmax { 315 if nw >= nh-1 { 316 nw = nh 317 } else { 318 kwtop := kbot - nw + 1 319 if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) { 320 nw++ 321 } 322 } 323 } 324 if ndfl < kexnw { 325 ndec = -1 326 } else if ndec >= 0 || nw >= nwupbd { 327 ndec++ 328 if nw-ndec < 2 { 329 ndec = 0 330 } 331 nw -= ndec 332 } 333 334 // Split workspace under the subdiagonal of H into: 335 // - an nw×nw work array V in the lower left-hand corner, 336 // - an nw×nhv horizontal work array along the bottom edge (nhv 337 // must be at least nw but more is better), 338 // - an nve×nw vertical work array along the left-hand-edge 339 // (nhv can be any positive integer but more is better). 340 kv := n - nw 341 kt := nw 342 kwv := nw + 1 343 nhv := n - kwv - kt 344 // Aggressive early deflation. 345 ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, 346 h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1], 347 h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur) 348 349 // Adjust kbot accounting for new deflations. 350 kbot -= ld 351 // ks points to the shifts. 352 ks := kbot - ls + 1 353 354 // Skip an expensive QR sweep if there is a (partly heuristic) 355 // reason to expect that many eigenvalues will deflate without 356 // it. Here, the QR sweep is skipped if many eigenvalues have 357 // just been deflated or if the remaining active block is small. 358 if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) { 359 // ld is positive, note progress. 360 ndfl = 1 361 continue 362 } 363 364 // ns is the nominal number of simultaneous shifts. This may be 365 // lowered (slightly) if Dlaqr23 did not provide that many 366 // shifts. 367 ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1 368 369 // If there have been no deflations in a multiple of kexsh 370 // iterations, then try exceptional shifts. Otherwise use shifts 371 // provided by Dlaqr23 above or from the eigenvalues of a 372 // trailing principal submatrix. 373 if ndfl%kexsh == 0 { 374 ks = kbot - ns + 1 375 for i := kbot; i > max(ks, ktop+1); i -= 2 { 376 ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2]) 377 aa := wilk1*ss + h[i*ldh+i] 378 _, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ = 379 impl.Dlanv2(aa, ss, wilk2*ss, aa) 380 } 381 if ks == ktop { 382 wr[ks+1] = h[(ks+1)*ldh+ks+1] 383 wi[ks+1] = 0 384 wr[ks] = wr[ks+1] 385 wi[ks] = wi[ks+1] 386 } 387 } else { 388 // If we got ns/2 or fewer shifts, use Dlahqr or recur 389 // into Dlaqr04 on a trailing principal submatrix to get 390 // more. Since ns <= nsmax <=(n+6)/9, there is enough 391 // space below the subdiagonal to fit an ns×ns scratch 392 // array. 393 if kbot-ks+1 <= ns/2 { 394 ks = kbot - ns + 1 395 kt = n - ns 396 impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh) 397 if ns > nmin && recur > 0 { 398 ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh, 399 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1) 400 } else { 401 ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh, 402 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 1) 403 } 404 // In case of a rare QR failure use eigenvalues 405 // of the trailing 2×2 principal submatrix. 406 if ks >= kbot { 407 aa := h[(kbot-1)*ldh+kbot-1] 408 bb := h[(kbot-1)*ldh+kbot] 409 cc := h[kbot*ldh+kbot-1] 410 dd := h[kbot*ldh+kbot] 411 _, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ = 412 impl.Dlanv2(aa, bb, cc, dd) 413 ks = kbot - 1 414 } 415 } 416 417 if kbot-ks+1 > ns { 418 // Sorting the shifts helps a little. Bubble 419 // sort keeps complex conjugate pairs together. 420 sorted := false 421 for k := kbot; k > ks; k-- { 422 if sorted { 423 break 424 } 425 sorted = true 426 for i := ks; i < k; i++ { 427 if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) { 428 continue 429 } 430 sorted = false 431 wr[i], wr[i+1] = wr[i+1], wr[i] 432 wi[i], wi[i+1] = wi[i+1], wi[i] 433 } 434 } 435 } 436 437 // Shuffle shifts into pairs of real shifts and pairs of 438 // complex conjugate shifts using the fact that complex 439 // conjugate shifts are already adjacent to one another. 440 // TODO(vladimir-ch): The shuffling here could probably 441 // be removed but I'm not sure right now and it's safer 442 // to leave it. 443 for i := kbot; i > ks+1; i -= 2 { 444 if wi[i] == -wi[i-1] { 445 continue 446 } 447 wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i] 448 wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i] 449 } 450 } 451 452 // If there are only two shifts and both are real, then use only one. 453 if kbot-ks+1 == 2 && wi[kbot] == 0 { 454 if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) { 455 wr[kbot-1] = wr[kbot] 456 } else { 457 wr[kbot] = wr[kbot-1] 458 } 459 } 460 461 // Use up to ns of the smallest magnitude shifts. If there 462 // aren't ns shifts available, then use them all, possibly 463 // dropping one to make the number of shifts even. 464 ns = min(ns, kbot-ks+1) &^ 1 465 ks = kbot - ns + 1 466 467 // Split workspace under the subdiagonal into: 468 // - a kdu×kdu work array U in the lower left-hand-corner, 469 // - a kdu×nhv horizontal work array WH along the bottom edge 470 // (nhv must be at least kdu but more is better), 471 // - an nhv×kdu vertical work array WV along the left-hand-edge 472 // (nhv must be at least kdu but more is better). 473 kdu := 2 * ns 474 ku := n - kdu 475 kwh := kdu 476 kwv = kdu + 3 477 nhv = n - kwv - kdu 478 // Small-bulge multi-shift QR sweep. 479 impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns, 480 wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz, 481 work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh) 482 483 // Note progress (or the lack of it). 484 if ld > 0 { 485 ndfl = 1 486 } else { 487 ndfl++ 488 } 489 } 490 491 work[0] = float64(lwkopt) 492 return unconverged 493 }