github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dlaqr23.go (about) 1 // Copyright ©2016 The gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package native 6 7 import ( 8 "math" 9 10 "github.com/gonum/blas" 11 "github.com/gonum/blas/blas64" 12 "github.com/gonum/lapack" 13 ) 14 15 // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper 16 // Hessenberg matrix to detect and deflate fully converged eigenvalues from a 17 // trailing principal submatrix using aggressive early deflation [1]. 18 // 19 // On return, H will be overwritten by a new Hessenberg matrix that is a 20 // perturbation of an orthogonal similarity transformation of H. It is hoped 21 // that on output H will have many zero subdiagonal entries. 22 // 23 // If wantt is true, the matrix H will be fully updated so that the 24 // quasi-triangular Schur factor can be computed. If wantt is false, then only 25 // enough of H will be updated to preserve the eigenvalues. 26 // 27 // If wantz is true, the orthogonal similarity transformation will be 28 // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced. 29 // 30 // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal 31 // of H. It must hold that 32 // 0 <= ilo <= ihi < n, if n > 0, 33 // ilo == 0 and ihi == -1, if n == 0, 34 // and the block must be isolated, that is, it must hold that 35 // ktop == 0 or H[ktop,ktop-1] == 0, 36 // kbot == n-1 or H[kbot+1,kbot] == 0, 37 // otherwise Dlaqr23 will panic. 38 // 39 // nw is the deflation window size. It must hold that 40 // 0 <= nw <= kbot-ktop+1, 41 // otherwise Dlaqr23 will panic. 42 // 43 // iloz and ihiz specify the rows of the n×n matrix Z to which transformations 44 // will be applied if wantz is true. It must hold that 45 // 0 <= iloz <= ktop, and kbot <= ihiz < n, 46 // otherwise Dlaqr23 will panic. 47 // 48 // sr and si must have length kbot+1, otherwise Dlaqr23 will panic. 49 // 50 // v and ldv represent an nw×nw work matrix. 51 // t and ldt represent an nw×nh work matrix, and nh must be at least nw. 52 // wv and ldwv represent an nv×nw work matrix. 53 // 54 // work must have length at least lwork and lwork must be at least max(1,2*nw), 55 // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater 56 // efficiency. On return, work[0] will contain the optimal value of lwork. 57 // 58 // If lwork is -1, instead of performing Dlaqr23, the function only estimates the 59 // optimal workspace size and stores it into work[0]. Neither h nor z are 60 // accessed. 61 // 62 // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves 63 // as DLAQR3, for recur == 0 it behaves as DLAQR2. 64 // 65 // On return, ns and nd will contain respectively the number of unconverged 66 // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in 67 // sr and si. 68 // 69 // On return, the real and imaginary parts of approximate eigenvalues that may 70 // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1] 71 // and si[kbot-nd-ns+1:kbot-nd+1]. 72 // 73 // On return, the real and imaginary parts of converged eigenvalues will be 74 // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1]. 75 // 76 // References: 77 // [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II: 78 // Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973 79 // URL: http://dx.doi.org/10.1137/S0895479801384585 80 // 81 func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) { 82 switch { 83 case ktop < 0 || max(0, n-1) < ktop: 84 panic("lapack: invalid value of ktop") 85 case kbot < min(ktop, n-1) || n <= kbot: 86 panic("lapack: invalid value of kbot") 87 case (nw < 0 || kbot-ktop+1 < nw) && lwork != -1: 88 panic("lapack: invalid value of nw") 89 case nh < nw: 90 panic("lapack: invalid value of nh") 91 case lwork < max(1, 2*nw) && lwork != -1: 92 panic(badWork) 93 case len(work) < lwork: 94 panic(shortWork) 95 case recur < 0: 96 panic("lapack: recur is negative") 97 } 98 if wantz { 99 switch { 100 case iloz < 0 || ktop < iloz: 101 panic("lapack: invalid value of iloz") 102 case ihiz < kbot || n <= ihiz: 103 panic("lapack: invalid value of ihiz") 104 } 105 } 106 if lwork != -1 { 107 // Check input slices only if not doing workspace query. 108 checkMatrix(n, n, h, ldh) 109 checkMatrix(nw, nw, v, ldv) 110 checkMatrix(nw, nh, t, ldt) 111 checkMatrix(nv, nw, wv, ldwv) 112 if wantz { 113 checkMatrix(n, n, z, ldz) 114 } 115 switch { 116 case ktop > 0 && h[ktop*ldh+ktop-1] != 0: 117 panic("lapack: block not isolated") 118 case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0: 119 panic("lapack: block not isolated") 120 case len(sr) != kbot+1: 121 panic("lapack: bad length of sr") 122 case len(si) != kbot+1: 123 panic("lapack: bad length of si") 124 } 125 } 126 127 // Quick return for zero window size. 128 if nw == 0 { 129 work[0] = 1 130 return 0, 0 131 } 132 133 // LAPACK code does not enforce the documented behavior 134 // nw <= kbot-ktop+1 135 // but we do (we panic above). 136 jw := nw 137 lwkopt := max(1, 2*nw) 138 if jw > 2 { 139 // Workspace query call to Dgehrd. 140 impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1) 141 lwk1 := int(work[0]) 142 // Workspace query call to Dormhr. 143 impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1) 144 lwk2 := int(work[0]) 145 if recur > 0 { 146 // Workspace query call to Dlaqr04. 147 impl.Dlaqr04(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1, recur-1) 148 lwk3 := int(work[0]) 149 // Optimal workspace. 150 lwkopt = max(jw+max(lwk1, lwk2), lwk3) 151 } else { 152 // Optimal workspace. 153 lwkopt = jw + max(lwk1, lwk2) 154 } 155 } 156 // Quick return in case of workspace query. 157 if lwork == -1 { 158 work[0] = float64(lwkopt) 159 return 0, 0 160 } 161 162 // Machine constants. 163 ulp := dlamchP 164 smlnum := float64(n) / ulp * dlamchS 165 166 // Setup deflation window. 167 var s float64 168 kwtop := kbot - jw + 1 169 if kwtop != ktop { 170 s = h[kwtop*ldh+kwtop-1] 171 } 172 if kwtop == kbot { 173 // 1×1 deflation window. 174 sr[kwtop] = h[kwtop*ldh+kwtop] 175 si[kwtop] = 0 176 ns = 1 177 nd = 0 178 if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) { 179 ns = 0 180 nd = 1 181 if kwtop > ktop { 182 h[kwtop*ldh+kwtop-1] = 0 183 } 184 } 185 work[0] = 1 186 return ns, nd 187 } 188 189 // Convert to spike-triangular form. In case of a rare QR failure, this 190 // routine continues to do aggressive early deflation using that part of 191 // the deflation window that converged using infqr here and there to 192 // keep track. 193 impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt) 194 bi := blas64.Implementation() 195 bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1) 196 impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv) 197 nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork) 198 var infqr int 199 if recur > 0 && jw > nmin { 200 infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1) 201 } else { 202 infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv) 203 } 204 // Note that ilo == 0 which conveniently coincides with the success 205 // value of infqr, that is, infqr as an index always points to the first 206 // converged eigenvalue. 207 208 // Dtrexc needs a clean margin near the diagonal. 209 for j := 0; j < jw-3; j++ { 210 t[(j+2)*ldt+j] = 0 211 t[(j+3)*ldt+j] = 0 212 } 213 if jw >= 3 { 214 t[(jw-1)*ldt+jw-3] = 0 215 } 216 217 ns = jw 218 ilst := infqr 219 // Deflation detection loop. 220 for ilst < ns { 221 bulge := false 222 if ns >= 2 { 223 bulge = t[(ns-1)*ldt+ns-2] != 0 224 } 225 if !bulge { 226 // Real eigenvalue. 227 abst := math.Abs(t[(ns-1)*ldt+ns-1]) 228 if abst == 0 { 229 abst = math.Abs(s) 230 } 231 if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) { 232 // Deflatable. 233 ns-- 234 } else { 235 // Undeflatable, move it up out of the way. 236 // Dtrexc can not fail in this case. 237 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) 238 ilst++ 239 } 240 continue 241 } 242 // Complex conjugate pair. 243 abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1])) 244 if abst == 0 { 245 abst = math.Abs(s) 246 } 247 if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) { 248 // Deflatable. 249 ns -= 2 250 } else { 251 // Undeflatable, move them up out of the way. 252 // Dtrexc does the right thing with ilst in case of a 253 // rare exchange failure. 254 _, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work) 255 ilst += 2 256 } 257 } 258 259 // Return to Hessenberg form. 260 if ns == 0 { 261 s = 0 262 } 263 if ns < jw { 264 // Sorting diagonal blocks of T improves accuracy for graded 265 // matrices. Bubble sort deals well with exchange failures. 266 sorted := false 267 i := ns 268 for !sorted { 269 sorted = true 270 kend := i - 1 271 i = infqr 272 var k int 273 if i == ns-1 || t[(i+1)*ldt+i] == 0 { 274 k = i + 1 275 } else { 276 k = i + 2 277 } 278 for k <= kend { 279 var evi float64 280 if k == i+1 { 281 evi = math.Abs(t[i*ldt+i]) 282 } else { 283 evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1])) 284 } 285 286 var evk float64 287 if k == kend || t[(k+1)*ldt+k] == 0 { 288 evk = math.Abs(t[k*ldt+k]) 289 } else { 290 evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1])) 291 } 292 293 if evi >= evk { 294 i = k 295 } else { 296 sorted = false 297 _, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work) 298 if ok { 299 i = ilst 300 } else { 301 i = k 302 } 303 } 304 if i == kend || t[(i+1)*ldt+i] == 0 { 305 k = i + 1 306 } else { 307 k = i + 2 308 } 309 } 310 } 311 } 312 313 // Restore shift/eigenvalue array from T. 314 for i := jw - 1; i >= infqr; { 315 if i == infqr || t[i*ldt+i-1] == 0 { 316 sr[kwtop+i] = t[i*ldt+i] 317 si[kwtop+i] = 0 318 i-- 319 continue 320 } 321 aa := t[(i-1)*ldt+i-1] 322 bb := t[(i-1)*ldt+i] 323 cc := t[i*ldt+i-1] 324 dd := t[i*ldt+i] 325 _, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd) 326 i -= 2 327 } 328 329 if ns < jw || s == 0 { 330 if ns > 1 && s != 0 { 331 // Reflect spike back into lower triangle. 332 bi.Dcopy(ns, v[:ns], 1, work[:ns], 1) 333 _, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1) 334 work[0] = 1 335 impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt) 336 impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:]) 337 impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:]) 338 impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:]) 339 impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw) 340 } 341 342 // Copy updated reduced window into place. 343 if kwtop > 0 { 344 h[kwtop*ldh+kwtop-1] = s * v[0] 345 } 346 impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh) 347 bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1) 348 349 // Accumulate orthogonal matrix in order to update H and Z, if 350 // requested. 351 if ns > 1 && s != 0 { 352 // work[:ns-1] contains the elementary reflectors stored 353 // by a call to Dgehrd above. 354 impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1, 355 t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw) 356 } 357 358 // Update vertical slab in H. 359 var ltop int 360 if !wantt { 361 ltop = ktop 362 } 363 for krow := ltop; krow < kwtop; krow += nv { 364 kln := min(nv, kwtop-krow) 365 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, 366 1, h[krow*ldh+kwtop:], ldh, v, ldv, 367 0, wv, ldwv) 368 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh) 369 } 370 371 // Update horizontal slab in H. 372 if wantt { 373 for kcol := kbot + 1; kcol < n; kcol += nh { 374 kln := min(nh, n-kcol) 375 bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw, 376 1, v, ldv, h[kwtop*ldh+kcol:], ldh, 377 0, t, ldt) 378 impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh) 379 } 380 } 381 382 // Update vertical slab in Z. 383 if wantz { 384 for krow := iloz; krow <= ihiz; krow += nv { 385 kln := min(nv, ihiz-krow+1) 386 bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw, 387 1, z[krow*ldz+kwtop:], ldz, v, ldv, 388 0, wv, ldwv) 389 impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz) 390 } 391 } 392 } 393 394 // The number of deflations. 395 nd = jw - ns 396 // Shifts are converged eigenvalues that could not be deflated. 397 // Subtracting infqr from the spike length takes care of the case of a 398 // rare QR failure while calculating eigenvalues of the deflation 399 // window. 400 ns -= infqr 401 work[0] = float64(lwkopt) 402 return ns, nd 403 }