github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dlaqr04.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/jingcheng-WU/gonum/blas"
    11  )
    12  
    13  // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix
    14  // H, and optionally the matrices T and Z from the Schur decomposition
    15  //  H = Z T Zᵀ
    16  // where T is an upper quasi-triangular matrix (the Schur form), and Z is the
    17  // orthogonal matrix of Schur vectors.
    18  //
    19  // wantt indicates whether the full Schur form T is required. If wantt is false,
    20  // then only enough of H will be updated to preserve the eigenvalues.
    21  //
    22  // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it
    23  // is true, the orthogonal similarity transformation will be accumulated into
    24  // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced.
    25  //
    26  // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that
    27  //  0 <= ilo <= ihi < n     if n > 0,
    28  //  ilo == 0 and ihi == -1  if n == 0,
    29  // and the block must be isolated, that is,
    30  //  ilo == 0   or H[ilo,ilo-1] == 0,
    31  //  ihi == n-1 or H[ihi+1,ihi] == 0,
    32  // otherwise Dlaqr04 will panic.
    33  //
    34  // wr and wi must have length ihi+1.
    35  //
    36  // iloz and ihiz specify the rows of Z to which transformations will be applied
    37  // if wantz is true. It must hold that
    38  //  0 <= iloz <= ilo,  and  ihi <= ihiz < n,
    39  // otherwise Dlaqr04 will panic.
    40  //
    41  // work must have length at least lwork and lwork must be
    42  //  lwork >= 1  if n <= 11,
    43  //  lwork >= n  if n > 11,
    44  // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for
    45  // optimal performance. On return, work[0] will contain the optimal value of
    46  // lwork.
    47  //
    48  // If lwork is -1, instead of performing Dlaqr04, the function only estimates the
    49  // optimal workspace size and stores it into work[0]. Neither h nor z are
    50  // accessed.
    51  //
    52  // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves
    53  // as DLAQR0, for recur == 0 it behaves as DLAQR4.
    54  //
    55  // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1].
    56  //
    57  // If unconverged is zero and wantt is true, H will contain on return the upper
    58  // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks
    59  // (corresponding to complex conjugate pairs of eigenvalues) will be returned in
    60  // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0.
    61  //
    62  // If unconverged is zero and if wantt is false, the contents of h on return is
    63  // unspecified.
    64  //
    65  // If unconverged is zero, all the eigenvalues have been computed and their real
    66  // and imaginary parts will be stored on return in wr[ilo:ihi+1] and
    67  // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex
    68  // conjugate pair, they are stored in consecutive elements of wr and wi, say the
    69  // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the
    70  // eigenvalues are stored in the same order as on the diagonal of the Schur form
    71  // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal
    72  // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i].
    73  //
    74  // If unconverged is positive, some eigenvalues have not converged, and
    75  // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those
    76  // eigenvalues which have been successfully computed. Failures are rare.
    77  //
    78  // If unconverged is positive and wantt is true, then on return
    79  //  (initial H)*U = U*(final H),   (*)
    80  // where U is an orthogonal matrix. The final H is upper Hessenberg and
    81  // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
    82  //
    83  // If unconverged is positive and wantt is false, on return the remaining
    84  // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
    85  // H[ilo:unconverged,ilo:unconverged].
    86  //
    87  // If unconverged is positive and wantz is true, then on return
    88  //  (final Z) = (initial Z)*U,
    89  // where U is the orthogonal matrix in (*) regardless of the value of wantt.
    90  //
    91  // References:
    92  //  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
    93  //      Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
    94  //      Anal. Appl. 23(4) (2002), pp. 929—947
    95  //      URL: http://dx.doi.org/10.1137/S0895479801384573
    96  //  [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
    97  //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
    98  //      URL: http://dx.doi.org/10.1137/S0895479801384585
    99  //
   100  // Dlaqr04 is an internal routine. It is exported for testing purposes.
   101  func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) {
   102  	const (
   103  		// Matrices of order ntiny or smaller must be processed by
   104  		// Dlahqr because of insufficient subdiagonal scratch space.
   105  		// This is a hard limit.
   106  		ntiny = 11
   107  		// Exceptional deflation windows: try to cure rare slow
   108  		// convergence by varying the size of the deflation window after
   109  		// kexnw iterations.
   110  		kexnw = 5
   111  		// Exceptional shifts: try to cure rare slow convergence with
   112  		// ad-hoc exceptional shifts every kexsh iterations.
   113  		kexsh = 6
   114  
   115  		// See https://github.com/gonum/lapack/pull/151#discussion_r68162802
   116  		// and the surrounding discussion for an explanation where these
   117  		// constants come from.
   118  		// TODO(vladimir-ch): Similar constants for exceptional shifts
   119  		// are used also in dlahqr.go. The first constant is different
   120  		// there, it is equal to 3. Why? And does it matter?
   121  		wilk1 = 0.75
   122  		wilk2 = -0.4375
   123  	)
   124  
   125  	switch {
   126  	case n < 0:
   127  		panic(nLT0)
   128  	case ilo < 0 || max(0, n-1) < ilo:
   129  		panic(badIlo)
   130  	case ihi < min(ilo, n-1) || n <= ihi:
   131  		panic(badIhi)
   132  	case ldh < max(1, n):
   133  		panic(badLdH)
   134  	case wantz && (iloz < 0 || ilo < iloz):
   135  		panic(badIloz)
   136  	case wantz && (ihiz < ihi || n <= ihiz):
   137  		panic(badIhiz)
   138  	case ldz < 1, wantz && ldz < n:
   139  		panic(badLdZ)
   140  	case lwork < 1 && lwork != -1:
   141  		panic(badLWork)
   142  	// TODO(vladimir-ch): Enable if and when we figure out what the minimum
   143  	// necessary lwork value is. Dlaqr04 says that the minimum is n which
   144  	// clashes with Dlaqr23's opinion about optimal work when nw <= 2
   145  	// (independent of n).
   146  	// case lwork < n && n > ntiny && lwork != -1:
   147  	// 	panic(badLWork)
   148  	case len(work) < max(1, lwork):
   149  		panic(shortWork)
   150  	case recur < 0:
   151  		panic(recurLT0)
   152  	}
   153  
   154  	// Quick return.
   155  	if n == 0 {
   156  		work[0] = 1
   157  		return 0
   158  	}
   159  
   160  	if lwork != -1 {
   161  		switch {
   162  		case len(h) < (n-1)*ldh+n:
   163  			panic(shortH)
   164  		case len(wr) != ihi+1:
   165  			panic(badLenWr)
   166  		case len(wi) != ihi+1:
   167  			panic(badLenWi)
   168  		case wantz && len(z) < (n-1)*ldz+n:
   169  			panic(shortZ)
   170  		case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
   171  			panic(notIsolated)
   172  		case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0:
   173  			panic(notIsolated)
   174  		}
   175  	}
   176  
   177  	if n <= ntiny {
   178  		// Tiny matrices must use Dlahqr.
   179  		if lwork == -1 {
   180  			work[0] = 1
   181  			return 0
   182  		}
   183  		return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz)
   184  	}
   185  
   186  	// Use small bulge multi-shift QR with aggressive early deflation on
   187  	// larger-than-tiny matrices.
   188  	var jbcmpz string
   189  	if wantt {
   190  		jbcmpz = "S"
   191  	} else {
   192  		jbcmpz = "E"
   193  	}
   194  	if wantz {
   195  		jbcmpz += "V"
   196  	} else {
   197  		jbcmpz += "N"
   198  	}
   199  
   200  	var fname string
   201  	if recur > 0 {
   202  		fname = "DLAQR0"
   203  	} else {
   204  		fname = "DLAQR4"
   205  	}
   206  	// nwr is the recommended deflation window size. n is greater than 11,
   207  	// so there is enough subdiagonal workspace for nwr >= 2 as required.
   208  	// (In fact, there is enough subdiagonal space for nwr >= 3.)
   209  	// TODO(vladimir-ch): If there is enough space for nwr >= 3, should we
   210  	// use it?
   211  	nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
   212  	nwr = max(2, nwr)
   213  	nwr = min(ihi-ilo+1, min((n-1)/3, nwr))
   214  
   215  	// nsr is the recommended number of simultaneous shifts. n is greater
   216  	// than 11, so there is enough subdiagonal workspace for nsr to be even
   217  	// and greater than or equal to two as required.
   218  	nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
   219  	nsr = min(nsr, min((n+6)/9, ihi-ilo))
   220  	nsr = max(2, nsr&^1)
   221  
   222  	// Workspace query call to Dlaqr23.
   223  	impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, h, ldh, iloz, ihiz, z, ldz,
   224  		wr, wi, h, ldh, n, h, ldh, n, h, ldh, work, -1, recur)
   225  	// Optimal workspace is max(Dlaqr5, Dlaqr23).
   226  	lwkopt := max(3*nsr/2, int(work[0]))
   227  	// Quick return in case of workspace query.
   228  	if lwork == -1 {
   229  		work[0] = float64(lwkopt)
   230  		return 0
   231  	}
   232  
   233  	// Dlahqr/Dlaqr04 crossover point.
   234  	nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork)
   235  	nmin = max(ntiny, nmin)
   236  
   237  	// Nibble determines when to skip a multi-shift QR sweep (Dlaqr5).
   238  	nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork)
   239  	nibble = max(0, nibble)
   240  
   241  	// Computation mode of far-from-diagonal orthogonal updates in Dlaqr5.
   242  	kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork)
   243  	kacc22 = max(0, min(kacc22, 2))
   244  
   245  	// nwmax is the largest possible deflation window for which there is
   246  	// sufficient workspace.
   247  	nwmax := min((n-1)/3, lwork/2)
   248  	nw := nwmax // Start with maximum deflation window size.
   249  
   250  	// nsmax is the largest number of simultaneous shifts for which there is
   251  	// sufficient workspace.
   252  	nsmax := min((n+6)/9, 2*lwork/3) &^ 1
   253  
   254  	ndfl := 1 // Number of iterations since last deflation.
   255  	ndec := 0 // Deflation window size decrement.
   256  
   257  	// Main loop.
   258  	var (
   259  		itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1))
   260  		it    = 0
   261  	)
   262  	for kbot := ihi; kbot >= ilo; {
   263  		if it == itmax {
   264  			unconverged = kbot + 1
   265  			break
   266  		}
   267  		it++
   268  
   269  		// Locate active block.
   270  		ktop := ilo
   271  		for k := kbot; k >= ilo+1; k-- {
   272  			if h[k*ldh+k-1] == 0 {
   273  				ktop = k
   274  				break
   275  			}
   276  		}
   277  
   278  		// Select deflation window size nw.
   279  		//
   280  		// Typical Case:
   281  		//  If possible and advisable, nibble the entire active block.
   282  		//  If not, use size min(nwr,nwmax) or min(nwr+1,nwmax)
   283  		//  depending upon which has the smaller corresponding
   284  		//  subdiagonal entry (a heuristic).
   285  		//
   286  		// Exceptional Case:
   287  		//  If there have been no deflations in kexnw or more
   288  		//  iterations, then vary the deflation window size. At first,
   289  		//  because larger windows are, in general, more powerful than
   290  		//  smaller ones, rapidly increase the window to the maximum
   291  		//  possible. Then, gradually reduce the window size.
   292  		nh := kbot - ktop + 1
   293  		nwupbd := min(nh, nwmax)
   294  		if ndfl < kexnw {
   295  			nw = min(nwupbd, nwr)
   296  		} else {
   297  			nw = min(nwupbd, 2*nw)
   298  		}
   299  		if nw < nwmax {
   300  			if nw >= nh-1 {
   301  				nw = nh
   302  			} else {
   303  				kwtop := kbot - nw + 1
   304  				if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) {
   305  					nw++
   306  				}
   307  			}
   308  		}
   309  		if ndfl < kexnw {
   310  			ndec = -1
   311  		} else if ndec >= 0 || nw >= nwupbd {
   312  			ndec++
   313  			if nw-ndec < 2 {
   314  				ndec = 0
   315  			}
   316  			nw -= ndec
   317  		}
   318  
   319  		// Split workspace under the subdiagonal of H into:
   320  		//  - an nw×nw work array V in the lower left-hand corner,
   321  		//  - an nw×nhv horizontal work array along the bottom edge (nhv
   322  		//    must be at least nw but more is better),
   323  		//  - an nve×nw vertical work array along the left-hand-edge
   324  		//    (nhv can be any positive integer but more is better).
   325  		kv := n - nw
   326  		kt := nw
   327  		kwv := nw + 1
   328  		nhv := n - kwv - kt
   329  		// Aggressive early deflation.
   330  		ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
   331  			h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
   332  			h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur)
   333  
   334  		// Adjust kbot accounting for new deflations.
   335  		kbot -= ld
   336  		// ks points to the shifts.
   337  		ks := kbot - ls + 1
   338  
   339  		// Skip an expensive QR sweep if there is a (partly heuristic)
   340  		// reason to expect that many eigenvalues will deflate without
   341  		// it. Here, the QR sweep is skipped if many eigenvalues have
   342  		// just been deflated or if the remaining active block is small.
   343  		if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) {
   344  			// ld is positive, note progress.
   345  			ndfl = 1
   346  			continue
   347  		}
   348  
   349  		// ns is the nominal number of simultaneous shifts. This may be
   350  		// lowered (slightly) if Dlaqr23 did not provide that many
   351  		// shifts.
   352  		ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
   353  
   354  		// If there have been no deflations in a multiple of kexsh
   355  		// iterations, then try exceptional shifts. Otherwise use shifts
   356  		// provided by Dlaqr23 above or from the eigenvalues of a
   357  		// trailing principal submatrix.
   358  		if ndfl%kexsh == 0 {
   359  			ks = kbot - ns + 1
   360  			for i := kbot; i > max(ks, ktop+1); i -= 2 {
   361  				ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
   362  				aa := wilk1*ss + h[i*ldh+i]
   363  				_, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ =
   364  					impl.Dlanv2(aa, ss, wilk2*ss, aa)
   365  			}
   366  			if ks == ktop {
   367  				wr[ks+1] = h[(ks+1)*ldh+ks+1]
   368  				wi[ks+1] = 0
   369  				wr[ks] = wr[ks+1]
   370  				wi[ks] = wi[ks+1]
   371  			}
   372  		} else {
   373  			// If we got ns/2 or fewer shifts, use Dlahqr or recur
   374  			// into Dlaqr04 on a trailing principal submatrix to get
   375  			// more. Since ns <= nsmax <=(n+6)/9, there is enough
   376  			// space below the subdiagonal to fit an ns×ns scratch
   377  			// array.
   378  			if kbot-ks+1 <= ns/2 {
   379  				ks = kbot - ns + 1
   380  				kt = n - ns
   381  				impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh)
   382  				if ns > nmin && recur > 0 {
   383  					ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh,
   384  						wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1)
   385  				} else {
   386  					ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh,
   387  						wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 1)
   388  				}
   389  				// In case of a rare QR failure use eigenvalues
   390  				// of the trailing 2×2 principal submatrix.
   391  				if ks >= kbot {
   392  					aa := h[(kbot-1)*ldh+kbot-1]
   393  					bb := h[(kbot-1)*ldh+kbot]
   394  					cc := h[kbot*ldh+kbot-1]
   395  					dd := h[kbot*ldh+kbot]
   396  					_, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ =
   397  						impl.Dlanv2(aa, bb, cc, dd)
   398  					ks = kbot - 1
   399  				}
   400  			}
   401  
   402  			if kbot-ks+1 > ns {
   403  				// Sorting the shifts helps a little. Bubble
   404  				// sort keeps complex conjugate pairs together.
   405  				sorted := false
   406  				for k := kbot; k > ks; k-- {
   407  					if sorted {
   408  						break
   409  					}
   410  					sorted = true
   411  					for i := ks; i < k; i++ {
   412  						if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) {
   413  							continue
   414  						}
   415  						sorted = false
   416  						wr[i], wr[i+1] = wr[i+1], wr[i]
   417  						wi[i], wi[i+1] = wi[i+1], wi[i]
   418  					}
   419  				}
   420  			}
   421  
   422  			// Shuffle shifts into pairs of real shifts and pairs of
   423  			// complex conjugate shifts using the fact that complex
   424  			// conjugate shifts are already adjacent to one another.
   425  			// TODO(vladimir-ch): The shuffling here could probably
   426  			// be removed but I'm not sure right now and it's safer
   427  			// to leave it.
   428  			for i := kbot; i > ks+1; i -= 2 {
   429  				if wi[i] == -wi[i-1] {
   430  					continue
   431  				}
   432  				wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i]
   433  				wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i]
   434  			}
   435  		}
   436  
   437  		// If there are only two shifts and both are real, then use only one.
   438  		if kbot-ks+1 == 2 && wi[kbot] == 0 {
   439  			if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) {
   440  				wr[kbot-1] = wr[kbot]
   441  			} else {
   442  				wr[kbot] = wr[kbot-1]
   443  			}
   444  		}
   445  
   446  		// Use up to ns of the smallest magnitude shifts. If there
   447  		// aren't ns shifts available, then use them all, possibly
   448  		// dropping one to make the number of shifts even.
   449  		ns = min(ns, kbot-ks+1) &^ 1
   450  		ks = kbot - ns + 1
   451  
   452  		// Split workspace under the subdiagonal into:
   453  		// - a kdu×kdu work array U in the lower left-hand-corner,
   454  		// - a kdu×nhv horizontal work array WH along the bottom edge
   455  		//   (nhv must be at least kdu but more is better),
   456  		// - an nhv×kdu vertical work array WV along the left-hand-edge
   457  		//   (nhv must be at least kdu but more is better).
   458  		kdu := 3*ns - 3
   459  		ku := n - kdu
   460  		kwh := kdu
   461  		kwv = kdu + 3
   462  		nhv = n - kwv - kdu
   463  		// Small-bulge multi-shift QR sweep.
   464  		impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns,
   465  			wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz,
   466  			work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh)
   467  
   468  		// Note progress (or the lack of it).
   469  		if ld > 0 {
   470  			ndfl = 1
   471  		} else {
   472  			ndfl++
   473  		}
   474  	}
   475  
   476  	work[0] = float64(lwkopt)
   477  	return unconverged
   478  }