github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dlaqr04.go (about) 1 // Copyright ©2016 The gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package native 6 7 import ( 8 "math" 9 10 "github.com/gonum/blas" 11 ) 12 13 // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix 14 // H, and optionally the matrices T and Z from the Schur decomposition 15 // H = Z T Z^T 16 // where T is an upper quasi-triangular matrix (the Schur form), and Z is the 17 // orthogonal matrix of Schur vectors. 18 // 19 // wantt indicates whether the full Schur form T is required. If wantt is false, 20 // then only enough of H will be updated to preserve the eigenvalues. 21 // 22 // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it 23 // is true, the orthogonal similarity transformation will be accumulated into 24 // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced. 25 // 26 // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that 27 // 0 <= ilo <= ihi < n, if n > 0, 28 // ilo == 0 and ihi == -1, if n == 0, 29 // and the block must be isolated, that is, 30 // ilo == 0 or H[ilo,ilo-1] == 0, 31 // ihi == n-1 or H[ihi+1,ihi] == 0, 32 // otherwise Dlaqr04 will panic. 33 // 34 // wr and wi must have length ihi+1. 35 // 36 // iloz and ihiz specify the rows of Z to which transformations will be applied 37 // if wantz is true. It must hold that 38 // 0 <= iloz <= ilo, and ihi <= ihiz < n, 39 // otherwise Dlaqr04 will panic. 40 // 41 // work must have length at least lwork and lwork must be 42 // lwork >= 1, if n <= 11, 43 // lwork >= n, if n > 11, 44 // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for 45 // optimal performance. On return, work[0] will contain the optimal value of 46 // lwork. 47 // 48 // If lwork is -1, instead of performing Dlaqr04, the function only estimates the 49 // optimal workspace size and stores it into work[0]. Neither h nor z are 50 // accessed. 51 // 52 // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves 53 // as DLAQR0, for recur == 0 it behaves as DLAQR4. 54 // 55 // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1]. 56 // 57 // If unconverged is zero and wantt is true, H will contain on return the upper 58 // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks 59 // (corresponding to complex conjugate pairs of eigenvalues) will be returned in 60 // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0. 61 // 62 // If unconverged is zero and if wantt is false, the contents of h on return is 63 // unspecified. 64 // 65 // If unconverged is zero, all the eigenvalues have been computed and their real 66 // and imaginary parts will be stored on return in wr[ilo:ihi+1] and 67 // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex 68 // conjugate pair, they are stored in consecutive elements of wr and wi, say the 69 // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the 70 // eigenvalues are stored in the same order as on the diagonal of the Schur form 71 // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal 72 // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i]. 73 // 74 // If unconverged is positive, some eigenvalues have not converged, and 75 // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those 76 // eigenvalues which have been successfully computed. Failures are rare. 77 // 78 // If unconverged is positive and wantt is true, then on return 79 // (initial H)*U = U*(final H), (*) 80 // where U is an orthogonal matrix. The final H is upper Hessenberg and 81 // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular. 82 // 83 // If unconverged is positive and wantt is false, on return the remaining 84 // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix 85 // H[ilo:unconverged,ilo:unconverged]. 86 // 87 // If unconverged is positive and wantz is true, then on return 88 // (final Z) = (initial Z)*U, 89 // where U is the orthogonal matrix in (*) regardless of the value of wantt. 90 // 91 // References: 92 // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I: 93 // Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix 94 // Anal. Appl. 23(4) (2002), pp. 929—947 95 // URL: http://dx.doi.org/10.1137/S0895479801384573 96 // [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: 97 // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973 98 // URL: http://dx.doi.org/10.1137/S0895479801384585 99 // 100 // Dlaqr04 is an internal routine. It is exported for testing purposes. 101 func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) { 102 const ( 103 // Matrices of order ntiny or smaller must be processed by 104 // Dlahqr because of insufficient subdiagonal scratch space. 105 // This is a hard limit. 106 ntiny = 11 107 // Exceptional deflation windows: try to cure rare slow 108 // convergence by varying the size of the deflation window after 109 // kexnw iterations. 110 kexnw = 5 111 // Exceptional shifts: try to cure rare slow convergence with 112 // ad-hoc exceptional shifts every kexsh iterations. 113 kexsh = 6 114 115 // See https://github.com/gonum/lapack/pull/151#discussion_r68162802 116 // and the surrounding discussion for an explanation where these 117 // constants come from. 118 // TODO(vladimir-ch): Similar constants for exceptional shifts 119 // are used also in dlahqr.go. The first constant is different 120 // there, it is equal to 3. Why? And does it matter? 121 wilk1 = 0.75 122 wilk2 = -0.4375 123 ) 124 125 switch { 126 case ilo < 0 || max(0, n-1) < ilo: 127 panic(badIlo) 128 case ihi < min(ilo, n-1) || n <= ihi: 129 panic(badIhi) 130 case lwork < 1 && n <= ntiny && lwork != -1: 131 panic(badWork) 132 // TODO(vladimir-ch): Enable if and when we figure out what the minimum 133 // necessary lwork value is. Dlaqr04 says that the minimum is n which 134 // clashes with Dlaqr23's opinion about optimal work when nw <= 2 135 // (independent of n). 136 // case lwork < n && n > ntiny && lwork != -1: 137 // panic(badWork) 138 case len(work) < lwork: 139 panic(shortWork) 140 case recur < 0: 141 panic("lapack: recur is negative") 142 } 143 if wantz { 144 if iloz < 0 || ilo < iloz { 145 panic("lapack: invalid value of iloz") 146 } 147 if ihiz < ihi || n <= ihiz { 148 panic("lapack: invalid value of ihiz") 149 } 150 } 151 if lwork != -1 { 152 checkMatrix(n, n, h, ldh) 153 if wantz { 154 checkMatrix(n, n, z, ldz) 155 } 156 switch { 157 case ilo > 0 && h[ilo*ldh+ilo-1] != 0: 158 panic("lapack: block not isolated") 159 case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0: 160 panic("lapack: block not isolated") 161 case len(wr) != ihi+1: 162 panic("lapack: bad length of wr") 163 case len(wi) != ihi+1: 164 panic("lapack: bad length of wi") 165 } 166 } 167 168 // Quick return. 169 if n == 0 { 170 work[0] = 1 171 return 0 172 } 173 174 if n <= ntiny { 175 // Tiny matrices must use Dlahqr. 176 work[0] = 1 177 if lwork == -1 { 178 return 0 179 } 180 return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz) 181 } 182 183 // Use small bulge multi-shift QR with aggressive early deflation on 184 // larger-than-tiny matrices. 185 var jbcmpz string 186 if wantt { 187 jbcmpz = "S" 188 } else { 189 jbcmpz = "E" 190 } 191 if wantz { 192 jbcmpz += "V" 193 } else { 194 jbcmpz += "N" 195 } 196 197 var fname string 198 if recur > 0 { 199 fname = "DLAQR0" 200 } else { 201 fname = "DLAQR4" 202 } 203 // nwr is the recommended deflation window size. n is greater than 11, 204 // so there is enough subdiagonal workspace for nwr >= 2 as required. 205 // (In fact, there is enough subdiagonal space for nwr >= 3.) 206 // TODO(vladimir-ch): If there is enough space for nwr >= 3, should we 207 // use it? 208 nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork) 209 nwr = max(2, nwr) 210 nwr = min(ihi-ilo+1, min((n-1)/3, nwr)) 211 212 // nsr is the recommended number of simultaneous shifts. n is greater 213 // than 11, so there is enough subdiagonal workspace for nsr to be even 214 // and greater than or equal to two as required. 215 nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork) 216 nsr = min(nsr, min((n+6)/9, ihi-ilo)) 217 nsr = max(2, nsr&^1) 218 219 // Workspace query call to Dlaqr23. 220 impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0, 221 nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, recur) 222 // Optimal workspace is max(Dlaqr5, Dlaqr23). 223 lwkopt := max(3*nsr/2, int(work[0])) 224 // Quick return in case of workspace query. 225 if lwork == -1 { 226 work[0] = float64(lwkopt) 227 return 0 228 } 229 230 // Dlahqr/Dlaqr04 crossover point. 231 nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork) 232 nmin = max(ntiny, nmin) 233 234 // Nibble determines when to skip a multi-shift QR sweep (Dlaqr5). 235 nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork) 236 nibble = max(0, nibble) 237 238 // Computation mode of far-from-diagonal orthogonal updates in Dlaqr5. 239 kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork) 240 kacc22 = max(0, min(kacc22, 2)) 241 242 // nwmax is the largest possible deflation window for which there is 243 // sufficient workspace. 244 nwmax := min((n-1)/3, lwork/2) 245 nw := nwmax // Start with maximum deflation window size. 246 247 // nsmax is the largest number of simultaneous shifts for which there is 248 // sufficient workspace. 249 nsmax := min((n+6)/9, 2*lwork/3) &^ 1 250 251 ndfl := 1 // Number of iterations since last deflation. 252 ndec := 0 // Deflation window size decrement. 253 254 // Main loop. 255 var ( 256 itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1)) 257 it = 0 258 ) 259 for kbot := ihi; kbot >= ilo; { 260 if it == itmax { 261 unconverged = kbot + 1 262 break 263 } 264 it++ 265 266 // Locate active block. 267 ktop := ilo 268 for k := kbot; k >= ilo+1; k-- { 269 if h[k*ldh+k-1] == 0 { 270 ktop = k 271 break 272 } 273 } 274 275 // Select deflation window size nw. 276 // 277 // Typical Case: 278 // If possible and advisable, nibble the entire active block. 279 // If not, use size min(nwr,nwmax) or min(nwr+1,nwmax) 280 // depending upon which has the smaller corresponding 281 // subdiagonal entry (a heuristic). 282 // 283 // Exceptional Case: 284 // If there have been no deflations in kexnw or more 285 // iterations, then vary the deflation window size. At first, 286 // because larger windows are, in general, more powerful than 287 // smaller ones, rapidly increase the window to the maximum 288 // possible. Then, gradually reduce the window size. 289 nh := kbot - ktop + 1 290 nwupbd := min(nh, nwmax) 291 if ndfl < kexnw { 292 nw = min(nwupbd, nwr) 293 } else { 294 nw = min(nwupbd, 2*nw) 295 } 296 if nw < nwmax { 297 if nw >= nh-1 { 298 nw = nh 299 } else { 300 kwtop := kbot - nw + 1 301 if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) { 302 nw++ 303 } 304 } 305 } 306 if ndfl < kexnw { 307 ndec = -1 308 } else if ndec >= 0 || nw >= nwupbd { 309 ndec++ 310 if nw-ndec < 2 { 311 ndec = 0 312 } 313 nw -= ndec 314 } 315 316 // Split workspace under the subdiagonal of H into: 317 // - an nw×nw work array V in the lower left-hand corner, 318 // - an nw×nhv horizontal work array along the bottom edge (nhv 319 // must be at least nw but more is better), 320 // - an nve×nw vertical work array along the left-hand-edge 321 // (nhv can be any positive integer but more is better). 322 kv := n - nw 323 kt := nw 324 kwv := nw + 1 325 nhv := n - kwv - kt 326 // Aggressive early deflation. 327 ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw, 328 h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1], 329 h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur) 330 331 // Adjust kbot accounting for new deflations. 332 kbot -= ld 333 // ks points to the shifts. 334 ks := kbot - ls + 1 335 336 // Skip an expensive QR sweep if there is a (partly heuristic) 337 // reason to expect that many eigenvalues will deflate without 338 // it. Here, the QR sweep is skipped if many eigenvalues have 339 // just been deflated or if the remaining active block is small. 340 if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) { 341 // ld is positive, note progress. 342 ndfl = 1 343 continue 344 } 345 346 // ns is the nominal number of simultaneous shifts. This may be 347 // lowered (slightly) if Dlaqr23 did not provide that many 348 // shifts. 349 ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1 350 351 // If there have been no deflations in a multiple of kexsh 352 // iterations, then try exceptional shifts. Otherwise use shifts 353 // provided by Dlaqr23 above or from the eigenvalues of a 354 // trailing principal submatrix. 355 if ndfl%kexsh == 0 { 356 ks = kbot - ns + 1 357 for i := kbot; i > max(ks, ktop+1); i -= 2 { 358 ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2]) 359 aa := wilk1*ss + h[i*ldh+i] 360 _, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ = 361 impl.Dlanv2(aa, ss, wilk2*ss, aa) 362 } 363 if ks == ktop { 364 wr[ks+1] = h[(ks+1)*ldh+ks+1] 365 wi[ks+1] = 0 366 wr[ks] = wr[ks+1] 367 wi[ks] = wi[ks+1] 368 } 369 } else { 370 // If we got ns/2 or fewer shifts, use Dlahqr or recur 371 // into Dlaqr04 on a trailing principal submatrix to get 372 // more. Since ns <= nsmax <=(n+6)/9, there is enough 373 // space below the subdiagonal to fit an ns×ns scratch 374 // array. 375 if kbot-ks+1 <= ns/2 { 376 ks = kbot - ns + 1 377 kt = n - ns 378 impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh) 379 if ns > nmin && recur > 0 { 380 ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh, 381 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1) 382 } else { 383 ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh, 384 wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0) 385 } 386 // In case of a rare QR failure use eigenvalues 387 // of the trailing 2×2 principal submatrix. 388 if ks >= kbot { 389 aa := h[(kbot-1)*ldh+kbot-1] 390 bb := h[(kbot-1)*ldh+kbot] 391 cc := h[kbot*ldh+kbot-1] 392 dd := h[kbot*ldh+kbot] 393 _, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ = 394 impl.Dlanv2(aa, bb, cc, dd) 395 ks = kbot - 1 396 } 397 } 398 399 if kbot-ks+1 > ns { 400 // Sorting the shifts helps a little. Bubble 401 // sort keeps complex conjugate pairs together. 402 sorted := false 403 for k := kbot; k > ks; k-- { 404 if sorted { 405 break 406 } 407 sorted = true 408 for i := ks; i < k; i++ { 409 if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) { 410 continue 411 } 412 sorted = false 413 wr[i], wr[i+1] = wr[i+1], wr[i] 414 wi[i], wi[i+1] = wi[i+1], wi[i] 415 } 416 } 417 } 418 419 // Shuffle shifts into pairs of real shifts and pairs of 420 // complex conjugate shifts using the fact that complex 421 // conjugate shifts are already adjacent to one another. 422 // TODO(vladimir-ch): The shuffling here could probably 423 // be removed but I'm not sure right now and it's safer 424 // to leave it. 425 for i := kbot; i > ks+1; i -= 2 { 426 if wi[i] == -wi[i-1] { 427 continue 428 } 429 wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i] 430 wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i] 431 } 432 } 433 434 // If there are only two shifts and both are real, then use only one. 435 if kbot-ks+1 == 2 && wi[kbot] == 0 { 436 if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) { 437 wr[kbot-1] = wr[kbot] 438 } else { 439 wr[kbot] = wr[kbot-1] 440 } 441 } 442 443 // Use up to ns of the the smallest magnitude shifts. If there 444 // aren't ns shifts available, then use them all, possibly 445 // dropping one to make the number of shifts even. 446 ns = min(ns, kbot-ks+1) &^ 1 447 ks = kbot - ns + 1 448 449 // Split workspace under the subdiagonal into: 450 // - a kdu×kdu work array U in the lower left-hand-corner, 451 // - a kdu×nhv horizontal work array WH along the bottom edge 452 // (nhv must be at least kdu but more is better), 453 // - an nhv×kdu vertical work array WV along the left-hand-edge 454 // (nhv must be at least kdu but more is better). 455 kdu := 3*ns - 3 456 ku := n - kdu 457 kwh := kdu 458 kwv = kdu + 3 459 nhv = n - kwv - kdu 460 // Small-bulge multi-shift QR sweep. 461 impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns, 462 wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz, 463 work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh) 464 465 // Note progress (or the lack of it). 466 if ld > 0 { 467 ndfl = 1 468 } else { 469 ndfl++ 470 } 471 } 472 473 work[0] = float64(lwkopt) 474 return unconverged 475 }