gonum.org/v1/gonum@v0.14.0/lapack/gonum/dgehrd.go

gonum.org/v1/gonum@v0.14.0/lapack/gonum/dgehrd.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"gonum.org/v1/gonum/blas"
     9  	"gonum.org/v1/gonum/blas/blas64"
    10  	"gonum.org/v1/gonum/lapack"
    11  )
    12  
    13  // Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg
    14  // form H by an orthogonal similarity transformation Qᵀ * A * Q = H.
    15  //
    16  // The matrix Q is represented as a product of (ihi-ilo) elementary
    17  // reflectors
    18  //
    19  //	Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
    20  //
    21  // Each H_i has the form
    22  //
    23  //	H_i = I - tau[i] * v * vᵀ
    24  //
    25  // where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0.
    26  // v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i].
    27  //
    28  // On entry, a contains the n×n general matrix to be reduced. On return, the
    29  // upper triangle and the first subdiagonal of A will be overwritten with the
    30  // upper Hessenberg matrix H, and the elements below the first subdiagonal, with
    31  // the slice tau, represent the orthogonal matrix Q as a product of elementary
    32  // reflectors.
    33  //
    34  // The contents of a are illustrated by the following example, with n = 7, ilo =
    35  // 1 and ihi = 5.
    36  // On entry,
    37  //
    38  //	[ a   a   a   a   a   a   a ]
    39  //	[     a   a   a   a   a   a ]
    40  //	[     a   a   a   a   a   a ]
    41  //	[     a   a   a   a   a   a ]
    42  //	[     a   a   a   a   a   a ]
    43  //	[     a   a   a   a   a   a ]
    44  //	[                         a ]
    45  //
    46  // on return,
    47  //
    48  //	[ a   a   h   h   h   h   a ]
    49  //	[     a   h   h   h   h   a ]
    50  //	[     h   h   h   h   h   h ]
    51  //	[     v1  h   h   h   h   h ]
    52  //	[     v1  v2  h   h   h   h ]
    53  //	[     v1  v2  v3  h   h   h ]
    54  //	[                         a ]
    55  //
    56  // where a denotes an element of the original matrix A, h denotes a
    57  // modified element of the upper Hessenberg matrix H, and vi denotes an
    58  // element of the vector defining H_i.
    59  //
    60  // ilo and ihi determine the block of A that will be reduced to upper Hessenberg
    61  // form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi ==
    62  // -1 if n == 0, otherwise Dgehrd will panic.
    63  //
    64  // On return, tau will contain the scalar factors of the elementary reflectors.
    65  // Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length
    66  // equal to n-1 if n > 0, otherwise Dgehrd will panic.
    67  //
    68  // work must have length at least lwork and lwork must be at least max(1,n),
    69  // otherwise Dgehrd will panic. On return, work[0] contains the optimal value of
    70  // lwork.
    71  //
    72  // If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork
    73  // will be stored in work[0].
    74  //
    75  // Dgehrd is an internal routine. It is exported for testing purposes.
    76  func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
    77  	switch {
    78  	case n < 0:
    79  		panic(nLT0)
    80  	case ilo < 0 || max(0, n-1) < ilo:
    81  		panic(badIlo)
    82  	case ihi < min(ilo, n-1) || n <= ihi:
    83  		panic(badIhi)
    84  	case lda < max(1, n):
    85  		panic(badLdA)
    86  	case lwork < max(1, n) && lwork != -1:
    87  		panic(badLWork)
    88  	case len(work) < lwork:
    89  		panic(shortWork)
    90  	}
    91  
    92  	// Quick return if possible.
    93  	if n == 0 {
    94  		work[0] = 1
    95  		return
    96  	}
    97  
    98  	const (
    99  		nbmax = 64
   100  		ldt   = nbmax + 1
   101  		tsize = ldt * nbmax
   102  	)
   103  	// Compute the workspace requirements.
   104  	nb := min(nbmax, impl.Ilaenv(1, "DGEHRD", " ", n, ilo, ihi, -1))
   105  	lwkopt := n*nb + tsize
   106  	if lwork == -1 {
   107  		work[0] = float64(lwkopt)
   108  		return
   109  	}
   110  
   111  	if len(a) < (n-1)*lda+n {
   112  		panic(shortA)
   113  	}
   114  	if len(tau) != n-1 {
   115  		panic(badLenTau)
   116  	}
   117  
   118  	// Set tau[:ilo] and tau[ihi:] to zero.
   119  	for i := 0; i < ilo; i++ {
   120  		tau[i] = 0
   121  	}
   122  	for i := ihi; i < n-1; i++ {
   123  		tau[i] = 0
   124  	}
   125  
   126  	// Quick return if possible.
   127  	nh := ihi - ilo + 1
   128  	if nh <= 1 {
   129  		work[0] = 1
   130  		return
   131  	}
   132  
   133  	// Determine the block size.
   134  	nbmin := 2
   135  	var nx int
   136  	if 1 < nb && nb < nh {
   137  		// Determine when to cross over from blocked to unblocked code
   138  		// (last block is always handled by unblocked code).
   139  		nx = max(nb, impl.Ilaenv(3, "DGEHRD", " ", n, ilo, ihi, -1))
   140  		if nx < nh {
   141  			// Determine if workspace is large enough for blocked code.
   142  			if lwork < n*nb+tsize {
   143  				// Not enough workspace to use optimal nb:
   144  				// determine the minimum value of nb, and reduce
   145  				// nb or force use of unblocked code.
   146  				nbmin = max(2, impl.Ilaenv(2, "DGEHRD", " ", n, ilo, ihi, -1))
   147  				if lwork >= n*nbmin+tsize {
   148  					nb = (lwork - tsize) / n
   149  				} else {
   150  					nb = 1
   151  				}
   152  			}
   153  		}
   154  	}
   155  	ldwork := nb // work is used as an n×nb matrix.
   156  
   157  	var i int
   158  	if nb < nbmin || nh <= nb {
   159  		// Use unblocked code below.
   160  		i = ilo
   161  	} else {
   162  		// Use blocked code.
   163  		bi := blas64.Implementation()
   164  		iwt := n * nb // Size of the matrix Y and index where the matrix T starts in work.
   165  		for i = ilo; i < ihi-nx; i += nb {
   166  			ib := min(nb, ihi-i)
   167  
   168  			// Reduce columns [i:i+ib] to Hessenberg form, returning the
   169  			// matrices V and T of the block reflector H = I - V*T*Vᵀ
   170  			// which performs the reduction, and also the matrix Y = A*V*T.
   171  			impl.Dlahr2(ihi+1, i+1, ib, a[i:], lda, tau[i:], work[iwt:], ldt, work, ldwork)
   172  
   173  			// Apply the block reflector H to A[:ihi+1,i+ib:ihi+1] from the
   174  			// right, computing  A := A - Y * Vᵀ. V[i+ib,i+ib-1] must be set
   175  			// to 1.
   176  			ei := a[(i+ib)*lda+i+ib-1]
   177  			a[(i+ib)*lda+i+ib-1] = 1
   178  			bi.Dgemm(blas.NoTrans, blas.Trans, ihi+1, ihi-i-ib+1, ib,
   179  				-1, work, ldwork,
   180  				a[(i+ib)*lda+i:], lda,
   181  				1, a[i+ib:], lda)
   182  			a[(i+ib)*lda+i+ib-1] = ei
   183  
   184  			// Apply the block reflector H to A[0:i+1,i+1:i+ib-1] from the
   185  			// right.
   186  			bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, i+1, ib-1,
   187  				1, a[(i+1)*lda+i:], lda, work, ldwork)
   188  			for j := 0; j <= ib-2; j++ {
   189  				bi.Daxpy(i+1, -1, work[j:], ldwork, a[i+j+1:], lda)
   190  			}
   191  
   192  			// Apply the block reflector H to A[i+1:ihi+1,i+ib:n] from the
   193  			// left.
   194  			impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
   195  				ihi-i, n-i-ib, ib,
   196  				a[(i+1)*lda+i:], lda, work[iwt:], ldt, a[(i+1)*lda+i+ib:], lda, work, ldwork)
   197  		}
   198  	}
   199  	// Use unblocked code to reduce the rest of the matrix.
   200  	impl.Dgehd2(n, i, ihi, a, lda, tau, work)
   201  	work[0] = float64(lwkopt)
   202  }