gonum.org/v1/gonum@v0.14.0/lapack/gonum/dlaqr23.go

gonum.org/v1/gonum@v0.14.0/lapack/gonum/dlaqr23.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"math"
     9  
    10  	"gonum.org/v1/gonum/blas"
    11  	"gonum.org/v1/gonum/blas/blas64"
    12  	"gonum.org/v1/gonum/lapack"
    13  )
    14  
    15  // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
    16  // Hessenberg matrix to detect and deflate fully converged eigenvalues from a
    17  // trailing principal submatrix using aggressive early deflation [1].
    18  //
    19  // On return, H will be overwritten by a new Hessenberg matrix that is a
    20  // perturbation of an orthogonal similarity transformation of H. It is hoped
    21  // that on output H will have many zero subdiagonal entries.
    22  //
    23  // If wantt is true, the matrix H will be fully updated so that the
    24  // quasi-triangular Schur factor can be computed. If wantt is false, then only
    25  // enough of H will be updated to preserve the eigenvalues.
    26  //
    27  // If wantz is true, the orthogonal similarity transformation will be
    28  // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
    29  //
    30  // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
    31  // of H. It must hold that
    32  //
    33  //	0 <= ilo <= ihi < n     if n > 0,
    34  //	ilo == 0 and ihi == -1  if n == 0,
    35  //
    36  // and the block must be isolated, that is, it must hold that
    37  //
    38  //	ktop == 0   or H[ktop,ktop-1] == 0,
    39  //	kbot == n-1 or H[kbot+1,kbot] == 0,
    40  //
    41  // otherwise Dlaqr23 will panic.
    42  //
    43  // nw is the deflation window size. It must hold that
    44  //
    45  //	0 <= nw <= kbot-ktop+1,
    46  //
    47  // otherwise Dlaqr23 will panic.
    48  //
    49  // iloz and ihiz specify the rows of the n×n matrix Z to which transformations
    50  // will be applied if wantz is true. It must hold that
    51  //
    52  //	0 <= iloz <= ktop,  and  kbot <= ihiz < n,
    53  //
    54  // otherwise Dlaqr23 will panic.
    55  //
    56  // sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
    57  //
    58  // v and ldv represent an nw×nw work matrix.
    59  // t and ldt represent an nw×nh work matrix, and nh must be at least nw.
    60  // wv and ldwv represent an nv×nw work matrix.
    61  //
    62  // work must have length at least lwork and lwork must be at least max(1,2*nw),
    63  // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
    64  // efficiency. On return, work[0] will contain the optimal value of lwork.
    65  //
    66  // If lwork is -1, instead of performing Dlaqr23, the function only estimates the
    67  // optimal workspace size and stores it into work[0]. Neither h nor z are
    68  // accessed.
    69  //
    70  // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
    71  // as DLAQR3, for recur == 0 it behaves as DLAQR2.
    72  //
    73  // On return, ns and nd will contain respectively the number of unconverged
    74  // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
    75  // sr and si.
    76  //
    77  // On return, the real and imaginary parts of approximate eigenvalues that may
    78  // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
    79  // and si[kbot-nd-ns+1:kbot-nd+1].
    80  //
    81  // On return, the real and imaginary parts of converged eigenvalues will be
    82  // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
    83  //
    84  // References:
    85  //
    86  //	[1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
    87  //	    Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
    88  //	    URL: http://dx.doi.org/10.1137/S0895479801384585
    89  func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
    90  	switch {
    91  	case n < 0:
    92  		panic(nLT0)
    93  	case ktop < 0 || max(0, n-1) < ktop:
    94  		panic(badKtop)
    95  	case kbot < min(ktop, n-1) || n <= kbot:
    96  		panic(badKbot)
    97  	case nw < 0 || kbot-ktop+1+1 < nw:
    98  		panic(badNw)
    99  	case ldh < max(1, n):
   100  		panic(badLdH)
   101  	case wantz && (iloz < 0 || ktop < iloz):
   102  		panic(badIloz)
   103  	case wantz && (ihiz < kbot || n <= ihiz):
   104  		panic(badIhiz)
   105  	case ldz < 1, wantz && ldz < n:
   106  		panic(badLdZ)
   107  	case ldv < max(1, nw):
   108  		panic(badLdV)
   109  	case nh < nw:
   110  		panic(badNh)
   111  	case ldt < max(1, nh):
   112  		panic(badLdT)
   113  	case nv < 0:
   114  		panic(nvLT0)
   115  	case ldwv < max(1, nw):
   116  		panic(badLdWV)
   117  	case lwork < max(1, 2*nw) && lwork != -1:
   118  		panic(badLWork)
   119  	case len(work) < max(1, lwork):
   120  		panic(shortWork)
   121  	case recur < 0:
   122  		panic(recurLT0)
   123  	}
   124  
   125  	// Quick return for zero window size.
   126  	if nw == 0 {
   127  		work[0] = 1
   128  		return 0, 0
   129  	}
   130  
   131  	// LAPACK code does not enforce the documented behavior
   132  	//  nw <= kbot-ktop+1
   133  	// but we do (we panic above).
   134  	jw := nw
   135  	lwkopt := max(1, 2*nw)
   136  	if jw > 2 {
   137  		// Workspace query call to Dgehrd.
   138  		impl.Dgehrd(jw, 0, jw-2, t, ldt, work, work, -1)
   139  		lwk1 := int(work[0])
   140  		// Workspace query call to Dormhr.
   141  		impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, t, ldt, work, v, ldv, work, -1)
   142  		lwk2 := int(work[0])
   143  		if recur > 0 {
   144  			// Workspace query call to Dlaqr04.
   145  			impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr, si, 0, jw-1, v, ldv, work, -1, recur-1)
   146  			lwk3 := int(work[0])
   147  			// Optimal workspace.
   148  			lwkopt = max(jw+max(lwk1, lwk2), lwk3)
   149  		} else {
   150  			// Optimal workspace.
   151  			lwkopt = jw + max(lwk1, lwk2)
   152  		}
   153  	}
   154  	// Quick return in case of workspace query.
   155  	if lwork == -1 {
   156  		work[0] = float64(lwkopt)
   157  		return 0, 0
   158  	}
   159  
   160  	// Check input slices only if not doing workspace query.
   161  	switch {
   162  	case len(h) < (n-1)*ldh+n:
   163  		panic(shortH)
   164  	case len(v) < (nw-1)*ldv+nw:
   165  		panic(shortV)
   166  	case len(t) < (nw-1)*ldt+nh:
   167  		panic(shortT)
   168  	case len(wv) < (nv-1)*ldwv+nw:
   169  		panic(shortWV)
   170  	case wantz && len(z) < (n-1)*ldz+n:
   171  		panic(shortZ)
   172  	case len(sr) != kbot+1:
   173  		panic(badLenSr)
   174  	case len(si) != kbot+1:
   175  		panic(badLenSi)
   176  	case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
   177  		panic(notIsolated)
   178  	case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
   179  		panic(notIsolated)
   180  	}
   181  
   182  	// Machine constants.
   183  	ulp := dlamchP
   184  	smlnum := float64(n) / ulp * dlamchS
   185  
   186  	// Setup deflation window.
   187  	var s float64
   188  	kwtop := kbot - jw + 1
   189  	if kwtop != ktop {
   190  		s = h[kwtop*ldh+kwtop-1]
   191  	}
   192  	if kwtop == kbot {
   193  		// 1×1 deflation window.
   194  		sr[kwtop] = h[kwtop*ldh+kwtop]
   195  		si[kwtop] = 0
   196  		ns = 1
   197  		nd = 0
   198  		if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
   199  			ns = 0
   200  			nd = 1
   201  			if kwtop > ktop {
   202  				h[kwtop*ldh+kwtop-1] = 0
   203  			}
   204  		}
   205  		work[0] = 1
   206  		return ns, nd
   207  	}
   208  
   209  	// Convert to spike-triangular form. In case of a rare QR failure, this
   210  	// routine continues to do aggressive early deflation using that part of
   211  	// the deflation window that converged using infqr here and there to
   212  	// keep track.
   213  	impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
   214  	bi := blas64.Implementation()
   215  	bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
   216  	impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
   217  	nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
   218  	var infqr int
   219  	if recur > 0 && jw > nmin {
   220  		infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
   221  	} else {
   222  		infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
   223  	}
   224  	// Note that ilo == 0 which conveniently coincides with the success
   225  	// value of infqr, that is, infqr as an index always points to the first
   226  	// converged eigenvalue.
   227  
   228  	// Dtrexc needs a clean margin near the diagonal.
   229  	for j := 0; j < jw-3; j++ {
   230  		t[(j+2)*ldt+j] = 0
   231  		t[(j+3)*ldt+j] = 0
   232  	}
   233  	if jw >= 3 {
   234  		t[(jw-1)*ldt+jw-3] = 0
   235  	}
   236  
   237  	ns = jw
   238  	ilst := infqr
   239  	// Deflation detection loop.
   240  	for ilst < ns {
   241  		bulge := false
   242  		if ns >= 2 {
   243  			bulge = t[(ns-1)*ldt+ns-2] != 0
   244  		}
   245  		if !bulge {
   246  			// Real eigenvalue.
   247  			abst := math.Abs(t[(ns-1)*ldt+ns-1])
   248  			if abst == 0 {
   249  				abst = math.Abs(s)
   250  			}
   251  			if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
   252  				// Deflatable.
   253  				ns--
   254  			} else {
   255  				// Undeflatable, move it up out of the way.
   256  				// Dtrexc can not fail in this case.
   257  				_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
   258  				ilst++
   259  			}
   260  			continue
   261  		}
   262  		// Complex conjugate pair.
   263  		abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
   264  		if abst == 0 {
   265  			abst = math.Abs(s)
   266  		}
   267  		if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
   268  			// Deflatable.
   269  			ns -= 2
   270  		} else {
   271  			// Undeflatable, move them up out of the way.
   272  			// Dtrexc does the right thing with ilst in case of a
   273  			// rare exchange failure.
   274  			_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
   275  			ilst += 2
   276  		}
   277  	}
   278  
   279  	// Return to Hessenberg form.
   280  	if ns == 0 {
   281  		s = 0
   282  	}
   283  	if ns < jw {
   284  		// Sorting diagonal blocks of T improves accuracy for graded
   285  		// matrices. Bubble sort deals well with exchange failures.
   286  		sorted := false
   287  		i := ns
   288  		for !sorted {
   289  			sorted = true
   290  			kend := i - 1
   291  			i = infqr
   292  			var k int
   293  			if i == ns-1 || t[(i+1)*ldt+i] == 0 {
   294  				k = i + 1
   295  			} else {
   296  				k = i + 2
   297  			}
   298  			for k <= kend {
   299  				var evi float64
   300  				if k == i+1 {
   301  					evi = math.Abs(t[i*ldt+i])
   302  				} else {
   303  					evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
   304  				}
   305  
   306  				var evk float64
   307  				if k == kend || t[(k+1)*ldt+k] == 0 {
   308  					evk = math.Abs(t[k*ldt+k])
   309  				} else {
   310  					evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
   311  				}
   312  
   313  				if evi >= evk {
   314  					i = k
   315  				} else {
   316  					sorted = false
   317  					_, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
   318  					if ok {
   319  						i = ilst
   320  					} else {
   321  						i = k
   322  					}
   323  				}
   324  				if i == kend || t[(i+1)*ldt+i] == 0 {
   325  					k = i + 1
   326  				} else {
   327  					k = i + 2
   328  				}
   329  			}
   330  		}
   331  	}
   332  
   333  	// Restore shift/eigenvalue array from T.
   334  	for i := jw - 1; i >= infqr; {
   335  		if i == infqr || t[i*ldt+i-1] == 0 {
   336  			sr[kwtop+i] = t[i*ldt+i]
   337  			si[kwtop+i] = 0
   338  			i--
   339  			continue
   340  		}
   341  		aa := t[(i-1)*ldt+i-1]
   342  		bb := t[(i-1)*ldt+i]
   343  		cc := t[i*ldt+i-1]
   344  		dd := t[i*ldt+i]
   345  		_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
   346  		i -= 2
   347  	}
   348  
   349  	if ns < jw || s == 0 {
   350  		if ns > 1 && s != 0 {
   351  			// Reflect spike back into lower triangle.
   352  			bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
   353  			_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
   354  			work[0] = 1
   355  			impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
   356  			impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
   357  			impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
   358  			impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
   359  			impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
   360  		}
   361  
   362  		// Copy updated reduced window into place.
   363  		if kwtop > 0 {
   364  			h[kwtop*ldh+kwtop-1] = s * v[0]
   365  		}
   366  		impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
   367  		bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
   368  
   369  		// Accumulate orthogonal matrix in order to update H and Z, if
   370  		// requested.
   371  		if ns > 1 && s != 0 {
   372  			// work[:ns-1] contains the elementary reflectors stored
   373  			// by a call to Dgehrd above.
   374  			impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
   375  				t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
   376  		}
   377  
   378  		// Update vertical slab in H.
   379  		var ltop int
   380  		if !wantt {
   381  			ltop = ktop
   382  		}
   383  		for krow := ltop; krow < kwtop; krow += nv {
   384  			kln := min(nv, kwtop-krow)
   385  			bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
   386  				1, h[krow*ldh+kwtop:], ldh, v, ldv,
   387  				0, wv, ldwv)
   388  			impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
   389  		}
   390  
   391  		// Update horizontal slab in H.
   392  		if wantt {
   393  			for kcol := kbot + 1; kcol < n; kcol += nh {
   394  				kln := min(nh, n-kcol)
   395  				bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
   396  					1, v, ldv, h[kwtop*ldh+kcol:], ldh,
   397  					0, t, ldt)
   398  				impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
   399  			}
   400  		}
   401  
   402  		// Update vertical slab in Z.
   403  		if wantz {
   404  			for krow := iloz; krow <= ihiz; krow += nv {
   405  				kln := min(nv, ihiz-krow+1)
   406  				bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
   407  					1, z[krow*ldz+kwtop:], ldz, v, ldv,
   408  					0, wv, ldwv)
   409  				impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
   410  			}
   411  		}
   412  	}
   413  
   414  	// The number of deflations.
   415  	nd = jw - ns
   416  	// Shifts are converged eigenvalues that could not be deflated.
   417  	// Subtracting infqr from the spike length takes care of the case of a
   418  	// rare QR failure while calculating eigenvalues of the deflation
   419  	// window.
   420  	ns -= infqr
   421  	work[0] = float64(lwkopt)
   422  	return ns, nd
   423  }