gonum.org/v1/gonum@v0.14.0/lapack/gonum/dlaqr04.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"math"
     9  
    10  	"gonum.org/v1/gonum/blas"
    11  )
    12  
    13  // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix
    14  // H, and optionally the matrices T and Z from the Schur decomposition
    15  //
    16  //	H = Z T Zᵀ
    17  //
    18  // where T is an upper quasi-triangular matrix (the Schur form), and Z is the
    19  // orthogonal matrix of Schur vectors.
    20  //
    21  // wantt indicates whether the full Schur form T is required. If wantt is false,
    22  // then only enough of H will be updated to preserve the eigenvalues.
    23  //
    24  // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it
    25  // is true, the orthogonal similarity transformation will be accumulated into
    26  // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced.
    27  //
    28  // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that
    29  //
    30  //	0 <= ilo <= ihi < n     if n > 0,
    31  //	ilo == 0 and ihi == -1  if n == 0,
    32  //
    33  // and the block must be isolated, that is,
    34  //
    35  //	ilo == 0   or H[ilo,ilo-1] == 0,
    36  //	ihi == n-1 or H[ihi+1,ihi] == 0,
    37  //
    38  // otherwise Dlaqr04 will panic.
    39  //
    40  // wr and wi must have length ihi+1.
    41  //
    42  // iloz and ihiz specify the rows of Z to which transformations will be applied
    43  // if wantz is true. It must hold that
    44  //
    45  //	0 <= iloz <= ilo,  and  ihi <= ihiz < n,
    46  //
    47  // otherwise Dlaqr04 will panic.
    48  //
    49  // work must have length at least lwork and lwork must be
    50  //
    51  //	lwork >= 1  if n <= 11,
    52  //	lwork >= n  if n > 11,
    53  //
    54  // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for
    55  // optimal performance. On return, work[0] will contain the optimal value of
    56  // lwork.
    57  //
    58  // If lwork is -1, instead of performing Dlaqr04, the function only estimates the
    59  // optimal workspace size and stores it into work[0]. Neither h nor z are
    60  // accessed.
    61  //
    62  // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves
    63  // as DLAQR0, for recur == 0 it behaves as DLAQR4.
    64  //
    65  // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1].
    66  //
    67  // If unconverged is zero and wantt is true, H will contain on return the upper
    68  // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks
    69  // (corresponding to complex conjugate pairs of eigenvalues) will be returned in
    70  // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0.
    71  //
    72  // If unconverged is zero and if wantt is false, the contents of h on return is
    73  // unspecified.
    74  //
    75  // If unconverged is zero, all the eigenvalues have been computed and their real
    76  // and imaginary parts will be stored on return in wr[ilo:ihi+1] and
    77  // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex
    78  // conjugate pair, they are stored in consecutive elements of wr and wi, say the
    79  // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the
    80  // eigenvalues are stored in the same order as on the diagonal of the Schur form
    81  // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal
    82  // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i].
    83  //
    84  // If unconverged is positive, some eigenvalues have not converged, and
    85  // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those
    86  // eigenvalues which have been successfully computed. Failures are rare.
    87  //
    88  // If unconverged is positive and wantt is true, then on return
    89  //
    90  //	(initial H)*U = U*(final H),   (*)
    91  //
    92  // where U is an orthogonal matrix. The final H is upper Hessenberg and
    93  // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
    94  //
    95  // If unconverged is positive and wantt is false, on return the remaining
    96  // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
    97  // H[ilo:unconverged,ilo:unconverged].
    98  //
    99  // If unconverged is positive and wantz is true, then on return
   100  //
   101  //	(final Z) = (initial Z)*U,
   102  //
   103  // where U is the orthogonal matrix in (*) regardless of the value of wantt.
   104  //
   105  // References:
   106  //
   107  //	[1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
   108  //	    Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
   109  //	    Anal. Appl. 23(4) (2002), pp. 929—947
   110  //	    URL: http://dx.doi.org/10.1137/S0895479801384573
   111  //	[2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
   112  //	    Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
   113  //	    URL: http://dx.doi.org/10.1137/S0895479801384585
   114  //
   115  // Dlaqr04 is an internal routine. It is exported for testing purposes.
   116  func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) {
   117  	const (
   118  		// Matrices of order ntiny or smaller must be processed by
   119  		// Dlahqr because of insufficient subdiagonal scratch space.
   120  		// This is a hard limit.
   121  		ntiny = 15
   122  		// Exceptional deflation windows: try to cure rare slow
   123  		// convergence by varying the size of the deflation window after
   124  		// kexnw iterations.
   125  		kexnw = 5
   126  		// Exceptional shifts: try to cure rare slow convergence with
   127  		// ad-hoc exceptional shifts every kexsh iterations.
   128  		kexsh = 6
   129  
   130  		// See https://github.com/gonum/lapack/pull/151#discussion_r68162802
   131  		// and the surrounding discussion for an explanation where these
   132  		// constants come from.
   133  		// TODO(vladimir-ch): Similar constants for exceptional shifts
   134  		// are used also in dlahqr.go. The first constant is different
   135  		// there, it is equal to 3. Why? And does it matter?
   136  		wilk1 = 0.75
   137  		wilk2 = -0.4375
   138  	)
   139  
   140  	switch {
   141  	case n < 0:
   142  		panic(nLT0)
   143  	case ilo < 0 || max(0, n-1) < ilo:
   144  		panic(badIlo)
   145  	case ihi < min(ilo, n-1) || n <= ihi:
   146  		panic(badIhi)
   147  	case ldh < max(1, n):
   148  		panic(badLdH)
   149  	case wantz && (iloz < 0 || ilo < iloz):
   150  		panic(badIloz)
   151  	case wantz && (ihiz < ihi || n <= ihiz):
   152  		panic(badIhiz)
   153  	case ldz < 1, wantz && ldz < n:
   154  		panic(badLdZ)
   155  	case lwork < 1 && lwork != -1:
   156  		panic(badLWork)
   157  	// TODO(vladimir-ch): Enable if and when we figure out what the minimum
   158  	// necessary lwork value is. Dlaqr04 says that the minimum is n which
   159  	// clashes with Dlaqr23's opinion about optimal work when nw <= 2
   160  	// (independent of n).
   161  	// case lwork < n && n > ntiny && lwork != -1:
   162  	// 	panic(badLWork)
   163  	case len(work) < max(1, lwork):
   164  		panic(shortWork)
   165  	case recur < 0:
   166  		panic(recurLT0)
   167  	}
   168  
   169  	// Quick return.
   170  	if n == 0 {
   171  		work[0] = 1
   172  		return 0
   173  	}
   174  
   175  	if lwork != -1 {
   176  		switch {
   177  		case len(h) < (n-1)*ldh+n:
   178  			panic(shortH)
   179  		case len(wr) != ihi+1:
   180  			panic(badLenWr)
   181  		case len(wi) != ihi+1:
   182  			panic(badLenWi)
   183  		case wantz && len(z) < (n-1)*ldz+n:
   184  			panic(shortZ)
   185  		case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
   186  			panic(notIsolated)
   187  		case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0:
   188  			panic(notIsolated)
   189  		}
   190  	}
   191  
   192  	if n <= ntiny {
   193  		// Tiny matrices must use Dlahqr.
   194  		if lwork == -1 {
   195  			work[0] = 1
   196  			return 0
   197  		}
   198  		return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz)
   199  	}
   200  
   201  	// Use small bulge multi-shift QR with aggressive early deflation on
   202  	// larger-than-tiny matrices.
   203  	var jbcmpz string
   204  	if wantt {
   205  		jbcmpz = "S"
   206  	} else {
   207  		jbcmpz = "E"
   208  	}
   209  	if wantz {
   210  		jbcmpz += "V"
   211  	} else {
   212  		jbcmpz += "N"
   213  	}
   214  
   215  	var fname string
   216  	if recur > 0 {
   217  		fname = "DLAQR0"
   218  	} else {
   219  		fname = "DLAQR4"
   220  	}
   221  	// nwr is the recommended deflation window size. n is greater than ntiny,
   222  	// so there is enough subdiagonal workspace for nwr >= 2 as required.
   223  	// (In fact, there is enough subdiagonal space for nwr >= 4.)
   224  	// TODO(vladimir-ch): If there is enough space for nwr >= 4, should we
   225  	// use it?
   226  	nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
   227  	nwr = max(2, nwr)
   228  	nwr = min(ihi-ilo+1, min((n-1)/3, nwr))
   229  
   230  	// nsr is the recommended number of simultaneous shifts. n is greater than
   231  	// ntiny, so there is enough subdiagonal workspace for nsr to be even and
   232  	// greater than or equal to two as required.
   233  	nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
   234  	nsr = min(nsr, min((n-3)/6, ihi-ilo))
   235  	nsr = max(2, nsr&^1)
   236  
   237  	// Workspace query call to Dlaqr23.
   238  	impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz, ihiz, z, ldz,
   239  		wr, wi, h, ldh, n, h, ldh, n, h, ldh, work, -1, recur)
   240  	// Optimal workspace is max(Dlaqr5, Dlaqr23).
   241  	lwkopt := max(3*nsr/2, int(work[0]))
   242  	// Quick return in case of workspace query.
   243  	if lwork == -1 {
   244  		work[0] = float64(lwkopt)
   245  		return 0
   246  	}
   247  
   248  	// Dlahqr/Dlaqr04 crossover point.
   249  	nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork)
   250  	nmin = max(ntiny, nmin)
   251  
   252  	// Nibble determines when to skip a multi-shift QR sweep (Dlaqr5).
   253  	nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork)
   254  	nibble = max(0, nibble)
   255  
   256  	// Computation mode of far-from-diagonal orthogonal updates in Dlaqr5.
   257  	kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork)
   258  	kacc22 = max(0, min(kacc22, 2))
   259  
   260  	// nwmax is the largest possible deflation window for which there is
   261  	// sufficient workspace.
   262  	nwmax := min((n-1)/3, lwork/2)
   263  	nw := nwmax // Start with maximum deflation window size.
   264  
   265  	// nsmax is the largest number of simultaneous shifts for which there is
   266  	// sufficient workspace.
   267  	nsmax := min((n-3)/6, 2*lwork/3) &^ 1
   268  
   269  	ndfl := 1 // Number of iterations since last deflation.
   270  	ndec := 0 // Deflation window size decrement.
   271  
   272  	// Main loop.
   273  	var (
   274  		itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1))
   275  		it    = 0
   276  	)
   277  	for kbot := ihi; kbot >= ilo; {
   278  		if it == itmax {
   279  			unconverged = kbot + 1
   280  			break
   281  		}
   282  		it++
   283  
   284  		// Locate active block.
   285  		ktop := ilo
   286  		for k := kbot; k >= ilo+1; k-- {
   287  			if h[k*ldh+k-1] == 0 {
   288  				ktop = k
   289  				break
   290  			}
   291  		}
   292  
   293  		// Select deflation window size nw.
   294  		//
   295  		// Typical Case:
   296  		//  If possible and advisable, nibble the entire active block.
   297  		//  If not, use size min(nwr,nwmax) or min(nwr+1,nwmax)
   298  		//  depending upon which has the smaller corresponding
   299  		//  subdiagonal entry (a heuristic).
   300  		//
   301  		// Exceptional Case:
   302  		//  If there have been no deflations in kexnw or more
   303  		//  iterations, then vary the deflation window size. At first,
   304  		//  because larger windows are, in general, more powerful than
   305  		//  smaller ones, rapidly increase the window to the maximum
   306  		//  possible. Then, gradually reduce the window size.
   307  		nh := kbot - ktop + 1
   308  		nwupbd := min(nh, nwmax)
   309  		if ndfl < kexnw {
   310  			nw = min(nwupbd, nwr)
   311  		} else {
   312  			nw = min(nwupbd, 2*nw)
   313  		}
   314  		if nw < nwmax {
   315  			if nw >= nh-1 {
   316  				nw = nh
   317  			} else {
   318  				kwtop := kbot - nw + 1
   319  				if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) {
   320  					nw++
   321  				}
   322  			}
   323  		}
   324  		if ndfl < kexnw {
   325  			ndec = -1
   326  		} else if ndec >= 0 || nw >= nwupbd {
   327  			ndec++
   328  			if nw-ndec < 2 {
   329  				ndec = 0
   330  			}
   331  			nw -= ndec
   332  		}
   333  
   334  		// Split workspace under the subdiagonal of H into:
   335  		//  - an nw×nw work array V in the lower left-hand corner,
   336  		//  - an nw×nhv horizontal work array along the bottom edge (nhv
   337  		//    must be at least nw but more is better),
   338  		//  - an nve×nw vertical work array along the left-hand-edge
   339  		//    (nhv can be any positive integer but more is better).
   340  		kv := n - nw
   341  		kt := nw
   342  		kwv := nw + 1
   343  		nhv := n - kwv - kt
   344  		// Aggressive early deflation.
   345  		ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
   346  			h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
   347  			h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur)
   348  
   349  		// Adjust kbot accounting for new deflations.
   350  		kbot -= ld
   351  		// ks points to the shifts.
   352  		ks := kbot - ls + 1
   353  
   354  		// Skip an expensive QR sweep if there is a (partly heuristic)
   355  		// reason to expect that many eigenvalues will deflate without
   356  		// it. Here, the QR sweep is skipped if many eigenvalues have
   357  		// just been deflated or if the remaining active block is small.
   358  		if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) {
   359  			// ld is positive, note progress.
   360  			ndfl = 1
   361  			continue
   362  		}
   363  
   364  		// ns is the nominal number of simultaneous shifts. This may be
   365  		// lowered (slightly) if Dlaqr23 did not provide that many
   366  		// shifts.
   367  		ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
   368  
   369  		// If there have been no deflations in a multiple of kexsh
   370  		// iterations, then try exceptional shifts. Otherwise use shifts
   371  		// provided by Dlaqr23 above or from the eigenvalues of a
   372  		// trailing principal submatrix.
   373  		if ndfl%kexsh == 0 {
   374  			ks = kbot - ns + 1
   375  			for i := kbot; i > max(ks, ktop+1); i -= 2 {
   376  				ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
   377  				aa := wilk1*ss + h[i*ldh+i]
   378  				_, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ =
   379  					impl.Dlanv2(aa, ss, wilk2*ss, aa)
   380  			}
   381  			if ks == ktop {
   382  				wr[ks+1] = h[(ks+1)*ldh+ks+1]
   383  				wi[ks+1] = 0
   384  				wr[ks] = wr[ks+1]
   385  				wi[ks] = wi[ks+1]
   386  			}
   387  		} else {
   388  			// If we got ns/2 or fewer shifts, use Dlahqr or recur
   389  			// into Dlaqr04 on a trailing principal submatrix to get
   390  			// more. Since ns <= nsmax <=(n+6)/9, there is enough
   391  			// space below the subdiagonal to fit an ns×ns scratch
   392  			// array.
   393  			if kbot-ks+1 <= ns/2 {
   394  				ks = kbot - ns + 1
   395  				kt = n - ns
   396  				impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh)
   397  				if ns > nmin && recur > 0 {
   398  					ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh,
   399  						wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1)
   400  				} else {
   401  					ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh,
   402  						wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 1)
   403  				}
   404  				// In case of a rare QR failure use eigenvalues
   405  				// of the trailing 2×2 principal submatrix.
   406  				if ks >= kbot {
   407  					aa := h[(kbot-1)*ldh+kbot-1]
   408  					bb := h[(kbot-1)*ldh+kbot]
   409  					cc := h[kbot*ldh+kbot-1]
   410  					dd := h[kbot*ldh+kbot]
   411  					_, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ =
   412  						impl.Dlanv2(aa, bb, cc, dd)
   413  					ks = kbot - 1
   414  				}
   415  			}
   416  
   417  			if kbot-ks+1 > ns {
   418  				// Sorting the shifts helps a little. Bubble
   419  				// sort keeps complex conjugate pairs together.
   420  				sorted := false
   421  				for k := kbot; k > ks; k-- {
   422  					if sorted {
   423  						break
   424  					}
   425  					sorted = true
   426  					for i := ks; i < k; i++ {
   427  						if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) {
   428  							continue
   429  						}
   430  						sorted = false
   431  						wr[i], wr[i+1] = wr[i+1], wr[i]
   432  						wi[i], wi[i+1] = wi[i+1], wi[i]
   433  					}
   434  				}
   435  			}
   436  
   437  			// Shuffle shifts into pairs of real shifts and pairs of
   438  			// complex conjugate shifts using the fact that complex
   439  			// conjugate shifts are already adjacent to one another.
   440  			// TODO(vladimir-ch): The shuffling here could probably
   441  			// be removed but I'm not sure right now and it's safer
   442  			// to leave it.
   443  			for i := kbot; i > ks+1; i -= 2 {
   444  				if wi[i] == -wi[i-1] {
   445  					continue
   446  				}
   447  				wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i]
   448  				wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i]
   449  			}
   450  		}
   451  
   452  		// If there are only two shifts and both are real, then use only one.
   453  		if kbot-ks+1 == 2 && wi[kbot] == 0 {
   454  			if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) {
   455  				wr[kbot-1] = wr[kbot]
   456  			} else {
   457  				wr[kbot] = wr[kbot-1]
   458  			}
   459  		}
   460  
   461  		// Use up to ns of the smallest magnitude shifts. If there
   462  		// aren't ns shifts available, then use them all, possibly
   463  		// dropping one to make the number of shifts even.
   464  		ns = min(ns, kbot-ks+1) &^ 1
   465  		ks = kbot - ns + 1
   466  
   467  		// Split workspace under the subdiagonal into:
   468  		// - a kdu×kdu work array U in the lower left-hand-corner,
   469  		// - a kdu×nhv horizontal work array WH along the bottom edge
   470  		//   (nhv must be at least kdu but more is better),
   471  		// - an nhv×kdu vertical work array WV along the left-hand-edge
   472  		//   (nhv must be at least kdu but more is better).
   473  		kdu := 2 * ns
   474  		ku := n - kdu
   475  		kwh := kdu
   476  		kwv = kdu + 3
   477  		nhv = n - kwv - kdu
   478  		// Small-bulge multi-shift QR sweep.
   479  		impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns,
   480  			wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz,
   481  			work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh)
   482  
   483  		// Note progress (or the lack of it).
   484  		if ld > 0 {
   485  			ndfl = 1
   486  		} else {
   487  			ndfl++
   488  		}
   489  	}
   490  
   491  	work[0] = float64(lwkopt)
   492  	return unconverged
   493  }