github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dlaqr23.go (about)

     1  // Copyright ©2016 The gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package native
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/gonum/blas"
    11  	"github.com/gonum/blas/blas64"
    12  	"github.com/gonum/lapack"
    13  )
    14  
    15  // Dlaqr23 performs the orthogonal similarity transformation of an n×n upper
    16  // Hessenberg matrix to detect and deflate fully converged eigenvalues from a
    17  // trailing principal submatrix using aggressive early deflation [1].
    18  //
    19  // On return, H will be overwritten by a new Hessenberg matrix that is a
    20  // perturbation of an orthogonal similarity transformation of H. It is hoped
    21  // that on output H will have many zero subdiagonal entries.
    22  //
    23  // If wantt is true, the matrix H will be fully updated so that the
    24  // quasi-triangular Schur factor can be computed. If wantt is false, then only
    25  // enough of H will be updated to preserve the eigenvalues.
    26  //
    27  // If wantz is true, the orthogonal similarity transformation will be
    28  // accumulated into Z[iloz:ihiz+1,ktop:kbot+1], otherwise Z is not referenced.
    29  //
    30  // ktop and kbot determine a block [ktop:kbot+1,ktop:kbot+1] along the diagonal
    31  // of H. It must hold that
    32  //  0 <= ilo <= ihi < n,     if n > 0,
    33  //  ilo == 0 and ihi == -1,  if n == 0,
    34  // and the block must be isolated, that is, it must hold that
    35  //  ktop == 0   or H[ktop,ktop-1] == 0,
    36  //  kbot == n-1 or H[kbot+1,kbot] == 0,
    37  // otherwise Dlaqr23 will panic.
    38  //
    39  // nw is the deflation window size. It must hold that
    40  //  0 <= nw <= kbot-ktop+1,
    41  // otherwise Dlaqr23 will panic.
    42  //
    43  // iloz and ihiz specify the rows of the n×n matrix Z to which transformations
    44  // will be applied if wantz is true. It must hold that
    45  //  0 <= iloz <= ktop,  and  kbot <= ihiz < n,
    46  // otherwise Dlaqr23 will panic.
    47  //
    48  // sr and si must have length kbot+1, otherwise Dlaqr23 will panic.
    49  //
    50  // v and ldv represent an nw×nw work matrix.
    51  // t and ldt represent an nw×nh work matrix, and nh must be at least nw.
    52  // wv and ldwv represent an nv×nw work matrix.
    53  //
    54  // work must have length at least lwork and lwork must be at least max(1,2*nw),
    55  // otherwise Dlaqr23 will panic. Larger values of lwork may result in greater
    56  // efficiency. On return, work[0] will contain the optimal value of lwork.
    57  //
    58  // If lwork is -1, instead of performing Dlaqr23, the function only estimates the
    59  // optimal workspace size and stores it into work[0]. Neither h nor z are
    60  // accessed.
    61  //
    62  // recur is the non-negative recursion depth. For recur > 0, Dlaqr23 behaves
    63  // as DLAQR3, for recur == 0 it behaves as DLAQR2.
    64  //
    65  // On return, ns and nd will contain respectively the number of unconverged
    66  // (i.e., approximate) eigenvalues and converged eigenvalues that are stored in
    67  // sr and si.
    68  //
    69  // On return, the real and imaginary parts of approximate eigenvalues that may
    70  // be used for shifts will be stored respectively in sr[kbot-nd-ns+1:kbot-nd+1]
    71  // and si[kbot-nd-ns+1:kbot-nd+1].
    72  //
    73  // On return, the real and imaginary parts of converged eigenvalues will be
    74  // stored respectively in sr[kbot-nd+1:kbot+1] and si[kbot-nd+1:kbot+1].
    75  //
    76  // References:
    77  //  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
    78  //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl 23(4) (2002), pp. 948—973
    79  //      URL: http://dx.doi.org/10.1137/S0895479801384585
    80  //
    81  func (impl Implementation) Dlaqr23(wantt, wantz bool, n, ktop, kbot, nw int, h []float64, ldh int, iloz, ihiz int, z []float64, ldz int, sr, si []float64, v []float64, ldv int, nh int, t []float64, ldt int, nv int, wv []float64, ldwv int, work []float64, lwork int, recur int) (ns, nd int) {
    82  	switch {
    83  	case ktop < 0 || max(0, n-1) < ktop:
    84  		panic("lapack: invalid value of ktop")
    85  	case kbot < min(ktop, n-1) || n <= kbot:
    86  		panic("lapack: invalid value of kbot")
    87  	case (nw < 0 || kbot-ktop+1 < nw) && lwork != -1:
    88  		panic("lapack: invalid value of nw")
    89  	case nh < nw:
    90  		panic("lapack: invalid value of nh")
    91  	case lwork < max(1, 2*nw) && lwork != -1:
    92  		panic(badWork)
    93  	case len(work) < lwork:
    94  		panic(shortWork)
    95  	case recur < 0:
    96  		panic("lapack: recur is negative")
    97  	}
    98  	if wantz {
    99  		switch {
   100  		case iloz < 0 || ktop < iloz:
   101  			panic("lapack: invalid value of iloz")
   102  		case ihiz < kbot || n <= ihiz:
   103  			panic("lapack: invalid value of ihiz")
   104  		}
   105  	}
   106  	if lwork != -1 {
   107  		// Check input slices only if not doing workspace query.
   108  		checkMatrix(n, n, h, ldh)
   109  		checkMatrix(nw, nw, v, ldv)
   110  		checkMatrix(nw, nh, t, ldt)
   111  		checkMatrix(nv, nw, wv, ldwv)
   112  		if wantz {
   113  			checkMatrix(n, n, z, ldz)
   114  		}
   115  		switch {
   116  		case ktop > 0 && h[ktop*ldh+ktop-1] != 0:
   117  			panic("lapack: block not isolated")
   118  		case kbot+1 < n && h[(kbot+1)*ldh+kbot] != 0:
   119  			panic("lapack: block not isolated")
   120  		case len(sr) != kbot+1:
   121  			panic("lapack: bad length of sr")
   122  		case len(si) != kbot+1:
   123  			panic("lapack: bad length of si")
   124  		}
   125  	}
   126  
   127  	// Quick return for zero window size.
   128  	if nw == 0 {
   129  		work[0] = 1
   130  		return 0, 0
   131  	}
   132  
   133  	// LAPACK code does not enforce the documented behavior
   134  	//  nw <= kbot-ktop+1
   135  	// but we do (we panic above).
   136  	jw := nw
   137  	lwkopt := max(1, 2*nw)
   138  	if jw > 2 {
   139  		// Workspace query call to Dgehrd.
   140  		impl.Dgehrd(jw, 0, jw-2, nil, 0, nil, work, -1)
   141  		lwk1 := int(work[0])
   142  		// Workspace query call to Dormhr.
   143  		impl.Dormhr(blas.Right, blas.NoTrans, jw, jw, 0, jw-2, nil, 0, nil, nil, 0, work, -1)
   144  		lwk2 := int(work[0])
   145  		if recur > 0 {
   146  			// Workspace query call to Dlaqr04.
   147  			impl.Dlaqr04(true, true, jw, 0, jw-1, nil, 0, nil, nil, 0, jw-1, nil, 0, work, -1, recur-1)
   148  			lwk3 := int(work[0])
   149  			// Optimal workspace.
   150  			lwkopt = max(jw+max(lwk1, lwk2), lwk3)
   151  		} else {
   152  			// Optimal workspace.
   153  			lwkopt = jw + max(lwk1, lwk2)
   154  		}
   155  	}
   156  	// Quick return in case of workspace query.
   157  	if lwork == -1 {
   158  		work[0] = float64(lwkopt)
   159  		return 0, 0
   160  	}
   161  
   162  	// Machine constants.
   163  	ulp := dlamchP
   164  	smlnum := float64(n) / ulp * dlamchS
   165  
   166  	// Setup deflation window.
   167  	var s float64
   168  	kwtop := kbot - jw + 1
   169  	if kwtop != ktop {
   170  		s = h[kwtop*ldh+kwtop-1]
   171  	}
   172  	if kwtop == kbot {
   173  		// 1×1 deflation window.
   174  		sr[kwtop] = h[kwtop*ldh+kwtop]
   175  		si[kwtop] = 0
   176  		ns = 1
   177  		nd = 0
   178  		if math.Abs(s) <= math.Max(smlnum, ulp*math.Abs(h[kwtop*ldh+kwtop])) {
   179  			ns = 0
   180  			nd = 1
   181  			if kwtop > ktop {
   182  				h[kwtop*ldh+kwtop-1] = 0
   183  			}
   184  		}
   185  		work[0] = 1
   186  		return ns, nd
   187  	}
   188  
   189  	// Convert to spike-triangular form. In case of a rare QR failure, this
   190  	// routine continues to do aggressive early deflation using that part of
   191  	// the deflation window that converged using infqr here and there to
   192  	// keep track.
   193  	impl.Dlacpy(blas.Upper, jw, jw, h[kwtop*ldh+kwtop:], ldh, t, ldt)
   194  	bi := blas64.Implementation()
   195  	bi.Dcopy(jw-1, h[(kwtop+1)*ldh+kwtop:], ldh+1, t[ldt:], ldt+1)
   196  	impl.Dlaset(blas.All, jw, jw, 0, 1, v, ldv)
   197  	nmin := impl.Ilaenv(12, "DLAQR3", "SV", jw, 0, jw-1, lwork)
   198  	var infqr int
   199  	if recur > 0 && jw > nmin {
   200  		infqr = impl.Dlaqr04(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv, work, lwork, recur-1)
   201  	} else {
   202  		infqr = impl.Dlahqr(true, true, jw, 0, jw-1, t, ldt, sr[kwtop:], si[kwtop:], 0, jw-1, v, ldv)
   203  	}
   204  	// Note that ilo == 0 which conveniently coincides with the success
   205  	// value of infqr, that is, infqr as an index always points to the first
   206  	// converged eigenvalue.
   207  
   208  	// Dtrexc needs a clean margin near the diagonal.
   209  	for j := 0; j < jw-3; j++ {
   210  		t[(j+2)*ldt+j] = 0
   211  		t[(j+3)*ldt+j] = 0
   212  	}
   213  	if jw >= 3 {
   214  		t[(jw-1)*ldt+jw-3] = 0
   215  	}
   216  
   217  	ns = jw
   218  	ilst := infqr
   219  	// Deflation detection loop.
   220  	for ilst < ns {
   221  		bulge := false
   222  		if ns >= 2 {
   223  			bulge = t[(ns-1)*ldt+ns-2] != 0
   224  		}
   225  		if !bulge {
   226  			// Real eigenvalue.
   227  			abst := math.Abs(t[(ns-1)*ldt+ns-1])
   228  			if abst == 0 {
   229  				abst = math.Abs(s)
   230  			}
   231  			if math.Abs(s*v[ns-1]) <= math.Max(smlnum, ulp*abst) {
   232  				// Deflatable.
   233  				ns--
   234  			} else {
   235  				// Undeflatable, move it up out of the way.
   236  				// Dtrexc can not fail in this case.
   237  				_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
   238  				ilst++
   239  			}
   240  			continue
   241  		}
   242  		// Complex conjugate pair.
   243  		abst := math.Abs(t[(ns-1)*ldt+ns-1]) + math.Sqrt(math.Abs(t[(ns-1)*ldt+ns-2]))*math.Sqrt(math.Abs(t[(ns-2)*ldt+ns-1]))
   244  		if abst == 0 {
   245  			abst = math.Abs(s)
   246  		}
   247  		if math.Max(math.Abs(s*v[ns-1]), math.Abs(s*v[ns-2])) <= math.Max(smlnum, ulp*abst) {
   248  			// Deflatable.
   249  			ns -= 2
   250  		} else {
   251  			// Undeflatable, move them up out of the way.
   252  			// Dtrexc does the right thing with ilst in case of a
   253  			// rare exchange failure.
   254  			_, ilst, _ = impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, ns-1, ilst, work)
   255  			ilst += 2
   256  		}
   257  	}
   258  
   259  	// Return to Hessenberg form.
   260  	if ns == 0 {
   261  		s = 0
   262  	}
   263  	if ns < jw {
   264  		// Sorting diagonal blocks of T improves accuracy for graded
   265  		// matrices. Bubble sort deals well with exchange failures.
   266  		sorted := false
   267  		i := ns
   268  		for !sorted {
   269  			sorted = true
   270  			kend := i - 1
   271  			i = infqr
   272  			var k int
   273  			if i == ns-1 || t[(i+1)*ldt+i] == 0 {
   274  				k = i + 1
   275  			} else {
   276  				k = i + 2
   277  			}
   278  			for k <= kend {
   279  				var evi float64
   280  				if k == i+1 {
   281  					evi = math.Abs(t[i*ldt+i])
   282  				} else {
   283  					evi = math.Abs(t[i*ldt+i]) + math.Sqrt(math.Abs(t[(i+1)*ldt+i]))*math.Sqrt(math.Abs(t[i*ldt+i+1]))
   284  				}
   285  
   286  				var evk float64
   287  				if k == kend || t[(k+1)*ldt+k] == 0 {
   288  					evk = math.Abs(t[k*ldt+k])
   289  				} else {
   290  					evk = math.Abs(t[k*ldt+k]) + math.Sqrt(math.Abs(t[(k+1)*ldt+k]))*math.Sqrt(math.Abs(t[k*ldt+k+1]))
   291  				}
   292  
   293  				if evi >= evk {
   294  					i = k
   295  				} else {
   296  					sorted = false
   297  					_, ilst, ok := impl.Dtrexc(lapack.UpdateSchur, jw, t, ldt, v, ldv, i, k, work)
   298  					if ok {
   299  						i = ilst
   300  					} else {
   301  						i = k
   302  					}
   303  				}
   304  				if i == kend || t[(i+1)*ldt+i] == 0 {
   305  					k = i + 1
   306  				} else {
   307  					k = i + 2
   308  				}
   309  			}
   310  		}
   311  	}
   312  
   313  	// Restore shift/eigenvalue array from T.
   314  	for i := jw - 1; i >= infqr; {
   315  		if i == infqr || t[i*ldt+i-1] == 0 {
   316  			sr[kwtop+i] = t[i*ldt+i]
   317  			si[kwtop+i] = 0
   318  			i--
   319  			continue
   320  		}
   321  		aa := t[(i-1)*ldt+i-1]
   322  		bb := t[(i-1)*ldt+i]
   323  		cc := t[i*ldt+i-1]
   324  		dd := t[i*ldt+i]
   325  		_, _, _, _, sr[kwtop+i-1], si[kwtop+i-1], sr[kwtop+i], si[kwtop+i], _, _ = impl.Dlanv2(aa, bb, cc, dd)
   326  		i -= 2
   327  	}
   328  
   329  	if ns < jw || s == 0 {
   330  		if ns > 1 && s != 0 {
   331  			// Reflect spike back into lower triangle.
   332  			bi.Dcopy(ns, v[:ns], 1, work[:ns], 1)
   333  			_, tau := impl.Dlarfg(ns, work[0], work[1:ns], 1)
   334  			work[0] = 1
   335  			impl.Dlaset(blas.Lower, jw-2, jw-2, 0, 0, t[2*ldt:], ldt)
   336  			impl.Dlarf(blas.Left, ns, jw, work[:ns], 1, tau, t, ldt, work[jw:])
   337  			impl.Dlarf(blas.Right, ns, ns, work[:ns], 1, tau, t, ldt, work[jw:])
   338  			impl.Dlarf(blas.Right, jw, ns, work[:ns], 1, tau, v, ldv, work[jw:])
   339  			impl.Dgehrd(jw, 0, ns-1, t, ldt, work[:jw-1], work[jw:], lwork-jw)
   340  		}
   341  
   342  		// Copy updated reduced window into place.
   343  		if kwtop > 0 {
   344  			h[kwtop*ldh+kwtop-1] = s * v[0]
   345  		}
   346  		impl.Dlacpy(blas.Upper, jw, jw, t, ldt, h[kwtop*ldh+kwtop:], ldh)
   347  		bi.Dcopy(jw-1, t[ldt:], ldt+1, h[(kwtop+1)*ldh+kwtop:], ldh+1)
   348  
   349  		// Accumulate orthogonal matrix in order to update H and Z, if
   350  		// requested.
   351  		if ns > 1 && s != 0 {
   352  			// work[:ns-1] contains the elementary reflectors stored
   353  			// by a call to Dgehrd above.
   354  			impl.Dormhr(blas.Right, blas.NoTrans, jw, ns, 0, ns-1,
   355  				t, ldt, work[:ns-1], v, ldv, work[jw:], lwork-jw)
   356  		}
   357  
   358  		// Update vertical slab in H.
   359  		var ltop int
   360  		if !wantt {
   361  			ltop = ktop
   362  		}
   363  		for krow := ltop; krow < kwtop; krow += nv {
   364  			kln := min(nv, kwtop-krow)
   365  			bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
   366  				1, h[krow*ldh+kwtop:], ldh, v, ldv,
   367  				0, wv, ldwv)
   368  			impl.Dlacpy(blas.All, kln, jw, wv, ldwv, h[krow*ldh+kwtop:], ldh)
   369  		}
   370  
   371  		// Update horizontal slab in H.
   372  		if wantt {
   373  			for kcol := kbot + 1; kcol < n; kcol += nh {
   374  				kln := min(nh, n-kcol)
   375  				bi.Dgemm(blas.Trans, blas.NoTrans, jw, kln, jw,
   376  					1, v, ldv, h[kwtop*ldh+kcol:], ldh,
   377  					0, t, ldt)
   378  				impl.Dlacpy(blas.All, jw, kln, t, ldt, h[kwtop*ldh+kcol:], ldh)
   379  			}
   380  		}
   381  
   382  		// Update vertical slab in Z.
   383  		if wantz {
   384  			for krow := iloz; krow <= ihiz; krow += nv {
   385  				kln := min(nv, ihiz-krow+1)
   386  				bi.Dgemm(blas.NoTrans, blas.NoTrans, kln, jw, jw,
   387  					1, z[krow*ldz+kwtop:], ldz, v, ldv,
   388  					0, wv, ldwv)
   389  				impl.Dlacpy(blas.All, kln, jw, wv, ldwv, z[krow*ldz+kwtop:], ldz)
   390  			}
   391  		}
   392  	}
   393  
   394  	// The number of deflations.
   395  	nd = jw - ns
   396  	// Shifts are converged eigenvalues that could not be deflated.
   397  	// Subtracting infqr from the spike length takes care of the case of a
   398  	// rare QR failure while calculating eigenvalues of the deflation
   399  	// window.
   400  	ns -= infqr
   401  	work[0] = float64(lwkopt)
   402  	return ns, nd
   403  }