github.com/gopherd/gonum@v0.0.4/lapack/gonum/dlaqr23.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/gopherd/gonum/blas"
    11  	"github.com/gopherd/gonum/blas/blas64"
    12  	"github.com/gopherd/gonum/lapack"
    13  )
    14  
    15  // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
    16  // Hessenberg matrix to detect and deflate fully converged eigenvalues from a
    17  // trailing principal submatrix using aggressive early deflation [1].
    18  //
    19  // On return, H will be overwritten by a new Hessenberg matrix that is a
    20  // perturbation of an orthogonal similarity transformation of H. It is hoped
    21  // that on output H will have many zero subdiagonal entries.
    22  //
    23  // If wantt is true, the matrix H will be fully updated so that the
    24  // quasi-triangular Schur factor can be computed. If wantt is false, then only
    25  // enough of H will be updated to preserve the eigenvalues.
    26  //
    27  // If wantz is true, the orthogonal similarity transformation will be
    28  // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
    29  //
    30  // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
    31  // of H. It must hold that
    32  //  0 <= ilo <= ihi < n     if n > 0,
    33  //  ilo == 0 and ihi == -1  if n == 0,
    34  // and the block must be isolated, that is, it must hold that
    35  //  ktop == 0   or H[ktop,ktop-1] == 0,
    36  //  kbot == n-1 or H[kbot+1,kbot] == 0,
    37  // otherwise Dlaqr23 will panic.
    38  //
    39  // nw is the deflation window size. It must hold that
    40  //  0 <= nw <= kbot-ktop+1,
    41  // otherwise Dlaqr23 will panic.
    42  //
    43  // iloz and ihiz specify the rows of the n×n matrix Z to which transformations
    44  // will be applied if wantz is true. It must hold that
    45  //  0 <= iloz <= ktop,  and  kbot <= ihiz < n,
    46  // otherwise Dlaqr23 will panic.
    47  //
    48  // sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
    49  //
    50  // v and ldv represent an nw×nw work matrix.
    51  // t and ldt represent an nw×nh work matrix, and nh must be at least nw.
    52  // wv and ldwv represent an nv×nw work matrix.
    53  //
    54  // work must have length at least lwork and lwork must be at least max(1,2*nw),
    55  // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
    56  // efficiency. On return, work[0] will contain the optimal value of lwork.
    57  //
    58  // If lwork is -1, instead of performing Dlaqr23, the function only estimates the
    59  // optimal workspace size and stores it into work[0]. Neither h nor z are
    60  // accessed.
    61  //
    62  // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
    63  // as DLAQR3, for recur == 0 it behaves as DLAQR2.
    64  //
    65  // On return, ns and nd will contain respectively the number of unconverged
    66  // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
    67  // sr and si.
    68  //
    69  // On return, the real and imaginary parts of approximate eigenvalues that may
    70  // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
    71  // and si[kbot-nd-ns+1:kbot-nd+1].
    72  //
    73  // On return, the real and imaginary parts of converged eigenvalues will be
    74  // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
    75  //
    76  // References:
    77  //  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
    78  //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
    79  //      URL: http://dx.doi.org/10.1137/S0895479801384585
    80  //
    81  func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
    82  	switch {
    83  	case n < 0:
    84  		panic(nLT0)
    85  	case ktop < 0 || max(0, n-1) < ktop:
    86  		panic(badKtop)
    87  	case kbot < min(ktop, n-1) || n <= kbot:
    88  		panic(badKbot)
    89  	case nw < 0 || kbot-ktop+1+1 < nw:
    90  		panic(badNw)
    91  	case ldh < max(1, n):
    92  		panic(badLdH)
    93  	case wantz && (iloz < 0 || ktop < iloz):
    94  		panic(badIloz)
    95  	case wantz && (ihiz < kbot || n <= ihiz):
    96  		panic(badIhiz)
    97  	case ldz < 1, wantz && ldz < n:
    98  		panic(badLdZ)
    99  	case ldv < max(1, nw):
   100  		panic(badLdV)
   101  	case nh < nw:
   102  		panic(badNh)
   103  	case ldt < max(1, nh):
   104  		panic(badLdT)
   105  	case nv < 0:
   106  		panic(nvLT0)
   107  	case ldwv < max(1, nw):
   108  		panic(badLdWV)
   109  	case lwork < max(1, 2*nw) && lwork != -1:
   110  		panic(badLWork)
   111  	case len(work) < max(1, lwork):
   112  		panic(shortWork)
   113  	case recur < 0:
   114  		panic(recurLT0)
   115  	}
   116  
   117  	// Quick return for zero window size.
   118  	if nw == 0 {
   119  		work[0] = 1
   120  		return 0, 0
   121  	}
   122  
   123  	// LAPACK code does not enforce the documented behavior
   124  	//  nw <= kbot-ktop+1
   125  	// but we do (we panic above).
   126  	jw := nw
   127  	lwkopt := max(1, 2*nw)
   128  	if jw > 2 {
   129  		// Workspace query call to Dgehrd.
   130  		impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1)
   131  		lwk1 := int(work[0])
   132  		// Workspace query call to Dormhr.
   133  		impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
   134  		lwk2 := int(work[0])
   135  		if recur > 0 {
   136  			// Workspace query call to Dlaqr04.
   137  			impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1)
   138  			lwk3 := int(work[0])
   139  			// Optimal workspace.
   140  			lwkopt = max(jw+max(lwk1, lwk2), lwk3)
   141  		} else {
   142  			// Optimal workspace.
   143  			lwkopt = jw + max(lwk1, lwk2)
   144  		}
   145  	}
   146  	// Quick return in case of workspace query.
   147  	if lwork == -1 {
   148  		work[0] = float64(lwkopt)
   149  		return 0, 0
   150  	}
   151  
   152  	// Check input slices only if not doing workspace query.
   153  	switch {
   154  	case len(h) < (n-1)*ldh+n:
   155  		panic(shortH)
   156  	case len(v) < (nw-1)*ldv+nw:
   157  		panic(shortV)
   158  	case len(t) < (nw-1)*ldt+nh:
   159  		panic(shortT)
   160  	case len(wv) < (nv-1)*ldwv+nw:
   161  		panic(shortWV)
   162  	case wantz && len(z) < (n-1)*ldz+n:
   163  		panic(shortZ)
   164  	case len(sr) != kbot+1:
   165  		panic(badLenSr)
   166  	case len(si) != kbot+1:
   167  		panic(badLenSi)
   168  	case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
   169  		panic(notIsolated)
   170  	case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
   171  		panic(notIsolated)
   172  	}
   173  
   174  	// Machine constants.
   175  	ulp := dlamchP
   176  	smlnum := float64(n) / ulp * dlamchS
   177  
   178  	// Setup deflation window.
   179  	var s float64
   180  	kwtop := kbot - jw + 1
   181  	if kwtop != ktop {
   182  		s = h[kwtop*ldh+kwtop-1]
   183  	}
   184  	if kwtop == kbot {
   185  		// 1×1 deflation window.
   186  		sr[kwtop] = h[kwtop*ldh+kwtop]
   187  		si[kwtop] = 0
   188  		ns = 1
   189  		nd = 0
   190  		if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
   191  			ns = 0
   192  			nd = 1
   193  			if kwtop > ktop {
   194  				h[kwtop*ldh+kwtop-1] = 0
   195  			}
   196  		}
   197  		work[0] = 1
   198  		return ns, nd
   199  	}
   200  
   201  	// Convert to spike-triangular form. In case of a rare QR failure, this
   202  	// routine continues to do aggressive early deflation using that part of
   203  	// the deflation window that converged using infqr here and there to
   204  	// keep track.
   205  	impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
   206  	bi := blas64.Implementation()
   207  	bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
   208  	impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
   209  	nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
   210  	var infqr int
   211  	if recur > 0 && jw > nmin {
   212  		infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
   213  	} else {
   214  		infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
   215  	}
   216  	// Note that ilo == 0 which conveniently coincides with the success
   217  	// value of infqr, that is, infqr as an index always points to the first
   218  	// converged eigenvalue.
   219  
   220  	// Dtrexc needs a clean margin near the diagonal.
   221  	for j := 0; j < jw-3; j++ {
   222  		t[(j+2)*ldt+j] = 0
   223  		t[(j+3)*ldt+j] = 0
   224  	}
   225  	if jw >= 3 {
   226  		t[(jw-1)*ldt+jw-3] = 0
   227  	}
   228  
   229  	ns = jw
   230  	ilst := infqr
   231  	// Deflation detection loop.
   232  	for ilst < ns {
   233  		bulge := false
   234  		if ns >= 2 {
   235  			bulge = t[(ns-1)*ldt+ns-2] != 0
   236  		}
   237  		if !bulge {
   238  			// Real eigenvalue.
   239  			abst := math.Abs(t[(ns-1)*ldt+ns-1])
   240  			if abst == 0 {
   241  				abst = math.Abs(s)
   242  			}
   243  			if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
   244  				// Deflatable.
   245  				ns--
   246  			} else {
   247  				// Undeflatable, move it up out of the way.
   248  				// Dtrexc can not fail in this case.
   249  				_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
   250  				ilst++
   251  			}
   252  			continue
   253  		}
   254  		// Complex conjugate pair.
   255  		abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
   256  		if abst == 0 {
   257  			abst = math.Abs(s)
   258  		}
   259  		if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
   260  			// Deflatable.
   261  			ns -= 2
   262  		} else {
   263  			// Undeflatable, move them up out of the way.
   264  			// Dtrexc does the right thing with ilst in case of a
   265  			// rare exchange failure.
   266  			_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
   267  			ilst += 2
   268  		}
   269  	}
   270  
   271  	// Return to Hessenberg form.
   272  	if ns == 0 {
   273  		s = 0
   274  	}
   275  	if ns < jw {
   276  		// Sorting diagonal blocks of T improves accuracy for graded
   277  		// matrices. Bubble sort deals well with exchange failures.
   278  		sorted := false
   279  		i := ns
   280  		for !sorted {
   281  			sorted = true
   282  			kend := i - 1
   283  			i = infqr
   284  			var k int
   285  			if i == ns-1 || t[(i+1)*ldt+i] == 0 {
   286  				k = i + 1
   287  			} else {
   288  				k = i + 2
   289  			}
   290  			for k <= kend {
   291  				var evi float64
   292  				if k == i+1 {
   293  					evi = math.Abs(t[i*ldt+i])
   294  				} else {
   295  					evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
   296  				}
   297  
   298  				var evk float64
   299  				if k == kend || t[(k+1)*ldt+k] == 0 {
   300  					evk = math.Abs(t[k*ldt+k])
   301  				} else {
   302  					evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
   303  				}
   304  
   305  				if evi >= evk {
   306  					i = k
   307  				} else {
   308  					sorted = false
   309  					_, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
   310  					if ok {
   311  						i = ilst
   312  					} else {
   313  						i = k
   314  					}
   315  				}
   316  				if i == kend || t[(i+1)*ldt+i] == 0 {
   317  					k = i + 1
   318  				} else {
   319  					k = i + 2
   320  				}
   321  			}
   322  		}
   323  	}
   324  
   325  	// Restore shift/eigenvalue array from T.
   326  	for i := jw - 1; i >= infqr; {
   327  		if i == infqr || t[i*ldt+i-1] == 0 {
   328  			sr[kwtop+i] = t[i*ldt+i]
   329  			si[kwtop+i] = 0
   330  			i--
   331  			continue
   332  		}
   333  		aa := t[(i-1)*ldt+i-1]
   334  		bb := t[(i-1)*ldt+i]
   335  		cc := t[i*ldt+i-1]
   336  		dd := t[i*ldt+i]
   337  		_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
   338  		i -= 2
   339  	}
   340  
   341  	if ns < jw || s == 0 {
   342  		if ns > 1 && s != 0 {
   343  			// Reflect spike back into lower triangle.
   344  			bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
   345  			_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
   346  			work[0] = 1
   347  			impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
   348  			impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
   349  			impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
   350  			impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
   351  			impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
   352  		}
   353  
   354  		// Copy updated reduced window into place.
   355  		if kwtop > 0 {
   356  			h[kwtop*ldh+kwtop-1] = s * v[0]
   357  		}
   358  		impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
   359  		bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
   360  
   361  		// Accumulate orthogonal matrix in order to update H and Z, if
   362  		// requested.
   363  		if ns > 1 && s != 0 {
   364  			// work[:ns-1] contains the elementary reflectors stored
   365  			// by a call to Dgehrd above.
   366  			impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
   367  				t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
   368  		}
   369  
   370  		// Update vertical slab in H.
   371  		var ltop int
   372  		if !wantt {
   373  			ltop = ktop
   374  		}
   375  		for krow := ltop; krow < kwtop; krow += nv {
   376  			kln := min(nv, kwtop-krow)
   377  			bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
   378  				1, h[krow*ldh+kwtop:], ldh, v, ldv,
   379  				0, wv, ldwv)
   380  			impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
   381  		}
   382  
   383  		// Update horizontal slab in H.
   384  		if wantt {
   385  			for kcol := kbot + 1; kcol < n; kcol += nh {
   386  				kln := min(nh, n-kcol)
   387  				bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
   388  					1, v, ldv, h[kwtop*ldh+kcol:], ldh,
   389  					0, t, ldt)
   390  				impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
   391  			}
   392  		}
   393  
   394  		// Update vertical slab in Z.
   395  		if wantz {
   396  			for krow := iloz; krow <= ihiz; krow += nv {
   397  				kln := min(nv, ihiz-krow+1)
   398  				bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
   399  					1, z[krow*ldz+kwtop:], ldz, v, ldv,
   400  					0, wv, ldwv)
   401  				impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
   402  			}
   403  		}
   404  	}
   405  
   406  	// The number of deflations.
   407  	nd = jw - ns
   408  	// Shifts are converged eigenvalues that could not be deflated.
   409  	// Subtracting infqr from the spike length takes care of the case of a
   410  	// rare QR failure while calculating eigenvalues of the deflation
   411  	// window.
   412  	ns -= infqr
   413  	work[0] = float64(lwkopt)
   414  	return ns, nd
   415  }