github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dlaqr04.go

github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dlaqr04.go (about)

     1  // Copyright ©2016 The gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package native
     6  
     7  import (
     8  	"math"
     9  
    10  	"github.com/gonum/blas"
    11  )
    12  
    13  // Dlaqr04 computes the eigenvalues of a block of an n×n upper Hessenberg matrix
    14  // H, and optionally the matrices T and Z from the Schur decomposition
    15  //  H = Z T Z^T
    16  // where T is an upper quasi-triangular matrix (the Schur form), and Z is the
    17  // orthogonal matrix of Schur vectors.
    18  //
    19  // wantt indicates whether the full Schur form T is required. If wantt is false,
    20  // then only enough of H will be updated to preserve the eigenvalues.
    21  //
    22  // wantz indicates whether the n×n matrix of Schur vectors Z is required. If it
    23  // is true, the orthogonal similarity transformation will be accumulated into
    24  // Z[iloz:ihiz+1,ilo:ihi+1], otherwise Z will not be referenced.
    25  //
    26  // ilo and ihi determine the block of H on which Dlaqr04 operates. It must hold that
    27  //  0 <= ilo <= ihi < n,     if n > 0,
    28  //  ilo == 0 and ihi == -1,  if n == 0,
    29  // and the block must be isolated, that is,
    30  //  ilo == 0   or H[ilo,ilo-1] == 0,
    31  //  ihi == n-1 or H[ihi+1,ihi] == 0,
    32  // otherwise Dlaqr04 will panic.
    33  //
    34  // wr and wi must have length ihi+1.
    35  //
    36  // iloz and ihiz specify the rows of Z to which transformations will be applied
    37  // if wantz is true. It must hold that
    38  //  0 <= iloz <= ilo,  and  ihi <= ihiz < n,
    39  // otherwise Dlaqr04 will panic.
    40  //
    41  // work must have length at least lwork and lwork must be
    42  //  lwork >= 1,  if n <= 11,
    43  //  lwork >= n,  if n > 11,
    44  // otherwise Dlaqr04 will panic. lwork as large as 6*n may be required for
    45  // optimal performance. On return, work[0] will contain the optimal value of
    46  // lwork.
    47  //
    48  // If lwork is -1, instead of performing Dlaqr04, the function only estimates the
    49  // optimal workspace size and stores it into work[0]. Neither h nor z are
    50  // accessed.
    51  //
    52  // recur is the non-negative recursion depth. For recur > 0, Dlaqr04 behaves
    53  // as DLAQR0, for recur == 0 it behaves as DLAQR4.
    54  //
    55  // unconverged indicates whether Dlaqr04 computed all the eigenvalues of H[ilo:ihi+1,ilo:ihi+1].
    56  //
    57  // If unconverged is zero and wantt is true, H will contain on return the upper
    58  // quasi-triangular matrix T from the Schur decomposition. 2×2 diagonal blocks
    59  // (corresponding to complex conjugate pairs of eigenvalues) will be returned in
    60  // standard form, with H[i,i] == H[i+1,i+1] and H[i+1,i]*H[i,i+1] < 0.
    61  //
    62  // If unconverged is zero and if wantt is false, the contents of h on return is
    63  // unspecified.
    64  //
    65  // If unconverged is zero, all the eigenvalues have been computed and their real
    66  // and imaginary parts will be stored on return in wr[ilo:ihi+1] and
    67  // wi[ilo:ihi+1], respectively. If two eigenvalues are computed as a complex
    68  // conjugate pair, they are stored in consecutive elements of wr and wi, say the
    69  // i-th and (i+1)th, with wi[i] > 0 and wi[i+1] < 0. If wantt is true, then the
    70  // eigenvalues are stored in the same order as on the diagonal of the Schur form
    71  // returned in H, with wr[i] = H[i,i] and, if H[i:i+2,i:i+2] is a 2×2 diagonal
    72  // block, wi[i] = sqrt(-H[i+1,i]*H[i,i+1]) and wi[i+1] = -wi[i].
    73  //
    74  // If unconverged is positive, some eigenvalues have not converged, and
    75  // wr[unconverged:ihi+1] and wi[unconverged:ihi+1] will contain those
    76  // eigenvalues which have been successfully computed. Failures are rare.
    77  //
    78  // If unconverged is positive and wantt is true, then on return
    79  //  (initial H)*U = U*(final H),   (*)
    80  // where U is an orthogonal matrix. The final H is upper Hessenberg and
    81  // H[unconverged:ihi+1,unconverged:ihi+1] is upper quasi-triangular.
    82  //
    83  // If unconverged is positive and wantt is false, on return the remaining
    84  // unconverged eigenvalues are the eigenvalues of the upper Hessenberg matrix
    85  // H[ilo:unconverged,ilo:unconverged].
    86  //
    87  // If unconverged is positive and wantz is true, then on return
    88  //  (final Z) = (initial Z)*U,
    89  // where U is the orthogonal matrix in (*) regardless of the value of wantt.
    90  //
    91  // References:
    92  //  [1] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part I:
    93  //      Maintaining Well-Focused Shifts and Level 3 Performance. SIAM J. Matrix
    94  //      Anal. Appl. 23(4) (2002), pp. 929—947
    95  //      URL: http://dx.doi.org/10.1137/S0895479801384573
    96  //  [2] K. Braman, R. Byers, R. Mathias. The Multishift QR Algorithm. Part II:
    97  //      Aggressive Early Deflation. SIAM J. Matrix Anal. Appl. 23(4) (2002), pp. 948—973
    98  //      URL: http://dx.doi.org/10.1137/S0895479801384585
    99  //
   100  // Dlaqr04 is an internal routine. It is exported for testing purposes.
   101  func (impl Implementation) Dlaqr04(wantt, wantz bool, n, ilo, ihi int, h []float64, ldh int, wr, wi []float64, iloz, ihiz int, z []float64, ldz int, work []float64, lwork int, recur int) (unconverged int) {
   102  	const (
   103  		// Matrices of order ntiny or smaller must be processed by
   104  		// Dlahqr because of insufficient subdiagonal scratch space.
   105  		// This is a hard limit.
   106  		ntiny = 11
   107  		// Exceptional deflation windows: try to cure rare slow
   108  		// convergence by varying the size of the deflation window after
   109  		// kexnw iterations.
   110  		kexnw = 5
   111  		// Exceptional shifts: try to cure rare slow convergence with
   112  		// ad-hoc exceptional shifts every kexsh iterations.
   113  		kexsh = 6
   114  
   115  		// See https://github.com/gonum/lapack/pull/151#discussion_r68162802
   116  		// and the surrounding discussion for an explanation where these
   117  		// constants come from.
   118  		// TODO(vladimir-ch): Similar constants for exceptional shifts
   119  		// are used also in dlahqr.go. The first constant is different
   120  		// there, it is equal to 3. Why? And does it matter?
   121  		wilk1 = 0.75
   122  		wilk2 = -0.4375
   123  	)
   124  
   125  	switch {
   126  	case ilo < 0 || max(0, n-1) < ilo:
   127  		panic(badIlo)
   128  	case ihi < min(ilo, n-1) || n <= ihi:
   129  		panic(badIhi)
   130  	case lwork < 1 && n <= ntiny && lwork != -1:
   131  		panic(badWork)
   132  	// TODO(vladimir-ch): Enable if and when we figure out what the minimum
   133  	// necessary lwork value is. Dlaqr04 says that the minimum is n which
   134  	// clashes with Dlaqr23's opinion about optimal work when nw <= 2
   135  	// (independent of n).
   136  	// case lwork < n && n > ntiny && lwork != -1:
   137  	// 	panic(badWork)
   138  	case len(work) < lwork:
   139  		panic(shortWork)
   140  	case recur < 0:
   141  		panic("lapack: recur is negative")
   142  	}
   143  	if wantz {
   144  		if iloz < 0 || ilo < iloz {
   145  			panic("lapack: invalid value of iloz")
   146  		}
   147  		if ihiz < ihi || n <= ihiz {
   148  			panic("lapack: invalid value of ihiz")
   149  		}
   150  	}
   151  	if lwork != -1 {
   152  		checkMatrix(n, n, h, ldh)
   153  		if wantz {
   154  			checkMatrix(n, n, z, ldz)
   155  		}
   156  		switch {
   157  		case ilo > 0 && h[ilo*ldh+ilo-1] != 0:
   158  			panic("lapack: block not isolated")
   159  		case ihi+1 < n && h[(ihi+1)*ldh+ihi] != 0:
   160  			panic("lapack: block not isolated")
   161  		case len(wr) != ihi+1:
   162  			panic("lapack: bad length of wr")
   163  		case len(wi) != ihi+1:
   164  			panic("lapack: bad length of wi")
   165  		}
   166  	}
   167  
   168  	// Quick return.
   169  	if n == 0 {
   170  		work[0] = 1
   171  		return 0
   172  	}
   173  
   174  	if n <= ntiny {
   175  		// Tiny matrices must use Dlahqr.
   176  		work[0] = 1
   177  		if lwork == -1 {
   178  			return 0
   179  		}
   180  		return impl.Dlahqr(wantt, wantz, n, ilo, ihi, h, ldh, wr, wi, iloz, ihiz, z, ldz)
   181  	}
   182  
   183  	// Use small bulge multi-shift QR with aggressive early deflation on
   184  	// larger-than-tiny matrices.
   185  	var jbcmpz string
   186  	if wantt {
   187  		jbcmpz = "S"
   188  	} else {
   189  		jbcmpz = "E"
   190  	}
   191  	if wantz {
   192  		jbcmpz += "V"
   193  	} else {
   194  		jbcmpz += "N"
   195  	}
   196  
   197  	var fname string
   198  	if recur > 0 {
   199  		fname = "DLAQR0"
   200  	} else {
   201  		fname = "DLAQR4"
   202  	}
   203  	// nwr is the recommended deflation window size. n is greater than 11,
   204  	// so there is enough subdiagonal workspace for nwr >= 2 as required.
   205  	// (In fact, there is enough subdiagonal space for nwr >= 3.)
   206  	// TODO(vladimir-ch): If there is enough space for nwr >= 3, should we
   207  	// use it?
   208  	nwr := impl.Ilaenv(13, fname, jbcmpz, n, ilo, ihi, lwork)
   209  	nwr = max(2, nwr)
   210  	nwr = min(ihi-ilo+1, min((n-1)/3, nwr))
   211  
   212  	// nsr is the recommended number of simultaneous shifts. n is greater
   213  	// than 11, so there is enough subdiagonal workspace for nsr to be even
   214  	// and greater than or equal to two as required.
   215  	nsr := impl.Ilaenv(15, fname, jbcmpz, n, ilo, ihi, lwork)
   216  	nsr = min(nsr, min((n+6)/9, ihi-ilo))
   217  	nsr = max(2, nsr&^1)
   218  
   219  	// Workspace query call to Dlaqr23.
   220  	impl.Dlaqr23(wantt, wantz, n, ilo, ihi, nwr+1, nil, 0, iloz, ihiz, nil, 0,
   221  		nil, nil, nil, 0, n, nil, 0, n, nil, 0, work, -1, recur)
   222  	// Optimal workspace is max(Dlaqr5, Dlaqr23).
   223  	lwkopt := max(3*nsr/2, int(work[0]))
   224  	// Quick return in case of workspace query.
   225  	if lwork == -1 {
   226  		work[0] = float64(lwkopt)
   227  		return 0
   228  	}
   229  
   230  	// Dlahqr/Dlaqr04 crossover point.
   231  	nmin := impl.Ilaenv(12, fname, jbcmpz, n, ilo, ihi, lwork)
   232  	nmin = max(ntiny, nmin)
   233  
   234  	// Nibble determines when to skip a multi-shift QR sweep (Dlaqr5).
   235  	nibble := impl.Ilaenv(14, fname, jbcmpz, n, ilo, ihi, lwork)
   236  	nibble = max(0, nibble)
   237  
   238  	// Computation mode of far-from-diagonal orthogonal updates in Dlaqr5.
   239  	kacc22 := impl.Ilaenv(16, fname, jbcmpz, n, ilo, ihi, lwork)
   240  	kacc22 = max(0, min(kacc22, 2))
   241  
   242  	// nwmax is the largest possible deflation window for which there is
   243  	// sufficient workspace.
   244  	nwmax := min((n-1)/3, lwork/2)
   245  	nw := nwmax // Start with maximum deflation window size.
   246  
   247  	// nsmax is the largest number of simultaneous shifts for which there is
   248  	// sufficient workspace.
   249  	nsmax := min((n+6)/9, 2*lwork/3) &^ 1
   250  
   251  	ndfl := 1 // Number of iterations since last deflation.
   252  	ndec := 0 // Deflation window size decrement.
   253  
   254  	// Main loop.
   255  	var (
   256  		itmax = max(30, 2*kexsh) * max(10, (ihi-ilo+1))
   257  		it    = 0
   258  	)
   259  	for kbot := ihi; kbot >= ilo; {
   260  		if it == itmax {
   261  			unconverged = kbot + 1
   262  			break
   263  		}
   264  		it++
   265  
   266  		// Locate active block.
   267  		ktop := ilo
   268  		for k := kbot; k >= ilo+1; k-- {
   269  			if h[k*ldh+k-1] == 0 {
   270  				ktop = k
   271  				break
   272  			}
   273  		}
   274  
   275  		// Select deflation window size nw.
   276  		//
   277  		// Typical Case:
   278  		//  If possible and advisable, nibble the entire active block.
   279  		//  If not, use size min(nwr,nwmax) or min(nwr+1,nwmax)
   280  		//  depending upon which has the smaller corresponding
   281  		//  subdiagonal entry (a heuristic).
   282  		//
   283  		// Exceptional Case:
   284  		//  If there have been no deflations in kexnw or more
   285  		//  iterations, then vary the deflation window size. At first,
   286  		//  because larger windows are, in general, more powerful than
   287  		//  smaller ones, rapidly increase the window to the maximum
   288  		//  possible. Then, gradually reduce the window size.
   289  		nh := kbot - ktop + 1
   290  		nwupbd := min(nh, nwmax)
   291  		if ndfl < kexnw {
   292  			nw = min(nwupbd, nwr)
   293  		} else {
   294  			nw = min(nwupbd, 2*nw)
   295  		}
   296  		if nw < nwmax {
   297  			if nw >= nh-1 {
   298  				nw = nh
   299  			} else {
   300  				kwtop := kbot - nw + 1
   301  				if math.Abs(h[kwtop*ldh+kwtop-1]) > math.Abs(h[(kwtop-1)*ldh+kwtop-2]) {
   302  					nw++
   303  				}
   304  			}
   305  		}
   306  		if ndfl < kexnw {
   307  			ndec = -1
   308  		} else if ndec >= 0 || nw >= nwupbd {
   309  			ndec++
   310  			if nw-ndec < 2 {
   311  				ndec = 0
   312  			}
   313  			nw -= ndec
   314  		}
   315  
   316  		// Split workspace under the subdiagonal of H into:
   317  		//  - an nw×nw work array V in the lower left-hand corner,
   318  		//  - an nw×nhv horizontal work array along the bottom edge (nhv
   319  		//    must be at least nw but more is better),
   320  		//  - an nve×nw vertical work array along the left-hand-edge
   321  		//    (nhv can be any positive integer but more is better).
   322  		kv := n - nw
   323  		kt := nw
   324  		kwv := nw + 1
   325  		nhv := n - kwv - kt
   326  		// Aggressive early deflation.
   327  		ls, ld := impl.Dlaqr23(wantt, wantz, n, ktop, kbot, nw,
   328  			h, ldh, iloz, ihiz, z, ldz, wr[:kbot+1], wi[:kbot+1],
   329  			h[kv*ldh:], ldh, nhv, h[kv*ldh+kt:], ldh, nhv, h[kwv*ldh:], ldh, work, lwork, recur)
   330  
   331  		// Adjust kbot accounting for new deflations.
   332  		kbot -= ld
   333  		// ks points to the shifts.
   334  		ks := kbot - ls + 1
   335  
   336  		// Skip an expensive QR sweep if there is a (partly heuristic)
   337  		// reason to expect that many eigenvalues will deflate without
   338  		// it. Here, the QR sweep is skipped if many eigenvalues have
   339  		// just been deflated or if the remaining active block is small.
   340  		if ld > 0 && (100*ld > nw*nibble || kbot-ktop+1 <= min(nmin, nwmax)) {
   341  			// ld is positive, note progress.
   342  			ndfl = 1
   343  			continue
   344  		}
   345  
   346  		// ns is the nominal number of simultaneous shifts. This may be
   347  		// lowered (slightly) if Dlaqr23 did not provide that many
   348  		// shifts.
   349  		ns := min(min(nsmax, nsr), max(2, kbot-ktop)) &^ 1
   350  
   351  		// If there have been no deflations in a multiple of kexsh
   352  		// iterations, then try exceptional shifts. Otherwise use shifts
   353  		// provided by Dlaqr23 above or from the eigenvalues of a
   354  		// trailing principal submatrix.
   355  		if ndfl%kexsh == 0 {
   356  			ks = kbot - ns + 1
   357  			for i := kbot; i > max(ks, ktop+1); i -= 2 {
   358  				ss := math.Abs(h[i*ldh+i-1]) + math.Abs(h[(i-1)*ldh+i-2])
   359  				aa := wilk1*ss + h[i*ldh+i]
   360  				_, _, _, _, wr[i-1], wi[i-1], wr[i], wi[i], _, _ =
   361  					impl.Dlanv2(aa, ss, wilk2*ss, aa)
   362  			}
   363  			if ks == ktop {
   364  				wr[ks+1] = h[(ks+1)*ldh+ks+1]
   365  				wi[ks+1] = 0
   366  				wr[ks] = wr[ks+1]
   367  				wi[ks] = wi[ks+1]
   368  			}
   369  		} else {
   370  			// If we got ns/2 or fewer shifts, use Dlahqr or recur
   371  			// into Dlaqr04 on a trailing principal submatrix to get
   372  			// more. Since ns <= nsmax <=(n+6)/9, there is enough
   373  			// space below the subdiagonal to fit an ns×ns scratch
   374  			// array.
   375  			if kbot-ks+1 <= ns/2 {
   376  				ks = kbot - ns + 1
   377  				kt = n - ns
   378  				impl.Dlacpy(blas.All, ns, ns, h[ks*ldh+ks:], ldh, h[kt*ldh:], ldh)
   379  				if ns > nmin && recur > 0 {
   380  					ks += impl.Dlaqr04(false, false, ns, 1, ns-1, h[kt*ldh:], ldh,
   381  						wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0, work, lwork, recur-1)
   382  				} else {
   383  					ks += impl.Dlahqr(false, false, ns, 0, ns-1, h[kt*ldh:], ldh,
   384  						wr[ks:ks+ns], wi[ks:ks+ns], 0, 0, nil, 0)
   385  				}
   386  				// In case of a rare QR failure use eigenvalues
   387  				// of the trailing 2×2 principal submatrix.
   388  				if ks >= kbot {
   389  					aa := h[(kbot-1)*ldh+kbot-1]
   390  					bb := h[(kbot-1)*ldh+kbot]
   391  					cc := h[kbot*ldh+kbot-1]
   392  					dd := h[kbot*ldh+kbot]
   393  					_, _, _, _, wr[kbot-1], wi[kbot-1], wr[kbot], wi[kbot], _, _ =
   394  						impl.Dlanv2(aa, bb, cc, dd)
   395  					ks = kbot - 1
   396  				}
   397  			}
   398  
   399  			if kbot-ks+1 > ns {
   400  				// Sorting the shifts helps a little. Bubble
   401  				// sort keeps complex conjugate pairs together.
   402  				sorted := false
   403  				for k := kbot; k > ks; k-- {
   404  					if sorted {
   405  						break
   406  					}
   407  					sorted = true
   408  					for i := ks; i < k; i++ {
   409  						if math.Abs(wr[i])+math.Abs(wi[i]) >= math.Abs(wr[i+1])+math.Abs(wi[i+1]) {
   410  							continue
   411  						}
   412  						sorted = false
   413  						wr[i], wr[i+1] = wr[i+1], wr[i]
   414  						wi[i], wi[i+1] = wi[i+1], wi[i]
   415  					}
   416  				}
   417  			}
   418  
   419  			// Shuffle shifts into pairs of real shifts and pairs of
   420  			// complex conjugate shifts using the fact that complex
   421  			// conjugate shifts are already adjacent to one another.
   422  			// TODO(vladimir-ch): The shuffling here could probably
   423  			// be removed but I'm not sure right now and it's safer
   424  			// to leave it.
   425  			for i := kbot; i > ks+1; i -= 2 {
   426  				if wi[i] == -wi[i-1] {
   427  					continue
   428  				}
   429  				wr[i], wr[i-1], wr[i-2] = wr[i-1], wr[i-2], wr[i]
   430  				wi[i], wi[i-1], wi[i-2] = wi[i-1], wi[i-2], wi[i]
   431  			}
   432  		}
   433  
   434  		// If there are only two shifts and both are real, then use only one.
   435  		if kbot-ks+1 == 2 && wi[kbot] == 0 {
   436  			if math.Abs(wr[kbot]-h[kbot*ldh+kbot]) < math.Abs(wr[kbot-1]-h[kbot*ldh+kbot]) {
   437  				wr[kbot-1] = wr[kbot]
   438  			} else {
   439  				wr[kbot] = wr[kbot-1]
   440  			}
   441  		}
   442  
   443  		// Use up to ns of the the smallest magnitude shifts. If there
   444  		// aren't ns shifts available, then use them all, possibly
   445  		// dropping one to make the number of shifts even.
   446  		ns = min(ns, kbot-ks+1) &^ 1
   447  		ks = kbot - ns + 1
   448  
   449  		// Split workspace under the subdiagonal into:
   450  		// - a kdu×kdu work array U in the lower left-hand-corner,
   451  		// - a kdu×nhv horizontal work array WH along the bottom edge
   452  		//   (nhv must be at least kdu but more is better),
   453  		// - an nhv×kdu vertical work array WV along the left-hand-edge
   454  		//   (nhv must be at least kdu but more is better).
   455  		kdu := 3*ns - 3
   456  		ku := n - kdu
   457  		kwh := kdu
   458  		kwv = kdu + 3
   459  		nhv = n - kwv - kdu
   460  		// Small-bulge multi-shift QR sweep.
   461  		impl.Dlaqr5(wantt, wantz, kacc22, n, ktop, kbot, ns,
   462  			wr[ks:ks+ns], wi[ks:ks+ns], h, ldh, iloz, ihiz, z, ldz,
   463  			work, 3, h[ku*ldh:], ldh, nhv, h[kwv*ldh:], ldh, nhv, h[ku*ldh+kwh:], ldh)
   464  
   465  		// Note progress (or the lack of it).
   466  		if ld > 0 {
   467  			ndfl = 1
   468  		} else {
   469  			ndfl++
   470  		}
   471  	}
   472  
   473  	work[0] = float64(lwkopt)
   474  	return unconverged
   475  }