github.com/gopherd/gonum@v0.0.4/lapack/gonum/dgehrd.go

github.com/gopherd/gonum@v0.0.4/lapack/gonum/dgehrd.go (about)

     1  // Copyright ©2016 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"github.com/gopherd/gonum/blas"
     9  	"github.com/gopherd/gonum/blas/blas64"
    10  	"github.com/gopherd/gonum/lapack"
    11  )
    12  
    13  // Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg
    14  // form H by an orthogonal similarity transformation Qᵀ * A * Q = H.
    15  //
    16  // The matrix Q is represented as a product of (ihi-ilo) elementary
    17  // reflectors
    18  //  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
    19  // Each H_i has the form
    20  //  H_i = I - tau[i] * v * vᵀ
    21  // where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0.
    22  // v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i].
    23  //
    24  // On entry, a contains the n×n general matrix to be reduced. On return, the
    25  // upper triangle and the first subdiagonal of A will be overwritten with the
    26  // upper Hessenberg matrix H, and the elements below the first subdiagonal, with
    27  // the slice tau, represent the orthogonal matrix Q as a product of elementary
    28  // reflectors.
    29  //
    30  // The contents of a are illustrated by the following example, with n = 7, ilo =
    31  // 1 and ihi = 5.
    32  // On entry,
    33  //  [ a   a   a   a   a   a   a ]
    34  //  [     a   a   a   a   a   a ]
    35  //  [     a   a   a   a   a   a ]
    36  //  [     a   a   a   a   a   a ]
    37  //  [     a   a   a   a   a   a ]
    38  //  [     a   a   a   a   a   a ]
    39  //  [                         a ]
    40  // on return,
    41  //  [ a   a   h   h   h   h   a ]
    42  //  [     a   h   h   h   h   a ]
    43  //  [     h   h   h   h   h   h ]
    44  //  [     v1  h   h   h   h   h ]
    45  //  [     v1  v2  h   h   h   h ]
    46  //  [     v1  v2  v3  h   h   h ]
    47  //  [                         a ]
    48  // where a denotes an element of the original matrix A, h denotes a
    49  // modified element of the upper Hessenberg matrix H, and vi denotes an
    50  // element of the vector defining H_i.
    51  //
    52  // ilo and ihi determine the block of A that will be reduced to upper Hessenberg
    53  // form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi ==
    54  // -1 if n == 0, otherwise Dgehrd will panic.
    55  //
    56  // On return, tau will contain the scalar factors of the elementary reflectors.
    57  // Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length
    58  // equal to n-1 if n > 0, otherwise Dgehrd will panic.
    59  //
    60  // work must have length at least lwork and lwork must be at least max(1,n),
    61  // otherwise Dgehrd will panic. On return, work[0] contains the optimal value of
    62  // lwork.
    63  //
    64  // If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork
    65  // will be stored in work[0].
    66  //
    67  // Dgehrd is an internal routine. It is exported for testing purposes.
    68  func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
    69  	switch {
    70  	case n < 0:
    71  		panic(nLT0)
    72  	case ilo < 0 || max(0, n-1) < ilo:
    73  		panic(badIlo)
    74  	case ihi < min(ilo, n-1) || n <= ihi:
    75  		panic(badIhi)
    76  	case lda < max(1, n):
    77  		panic(badLdA)
    78  	case lwork < max(1, n) && lwork != -1:
    79  		panic(badLWork)
    80  	case len(work) < lwork:
    81  		panic(shortWork)
    82  	}
    83  
    84  	// Quick return if possible.
    85  	if n == 0 {
    86  		work[0] = 1
    87  		return
    88  	}
    89  
    90  	const (
    91  		nbmax = 64
    92  		ldt   = nbmax + 1
    93  		tsize = ldt * nbmax
    94  	)
    95  	// Compute the workspace requirements.
    96  	nb := min(nbmax, impl.Ilaenv(1, "DGEHRD", " ", n, ilo, ihi, -1))
    97  	lwkopt := n*nb + tsize
    98  	if lwork == -1 {
    99  		work[0] = float64(lwkopt)
   100  		return
   101  	}
   102  
   103  	if len(a) < (n-1)*lda+n {
   104  		panic(shortA)
   105  	}
   106  	if len(tau) != n-1 {
   107  		panic(badLenTau)
   108  	}
   109  
   110  	// Set tau[:ilo] and tau[ihi:] to zero.
   111  	for i := 0; i < ilo; i++ {
   112  		tau[i] = 0
   113  	}
   114  	for i := ihi; i < n-1; i++ {
   115  		tau[i] = 0
   116  	}
   117  
   118  	// Quick return if possible.
   119  	nh := ihi - ilo + 1
   120  	if nh <= 1 {
   121  		work[0] = 1
   122  		return
   123  	}
   124  
   125  	// Determine the block size.
   126  	nbmin := 2
   127  	var nx int
   128  	if 1 < nb && nb < nh {
   129  		// Determine when to cross over from blocked to unblocked code
   130  		// (last block is always handled by unblocked code).
   131  		nx = max(nb, impl.Ilaenv(3, "DGEHRD", " ", n, ilo, ihi, -1))
   132  		if nx < nh {
   133  			// Determine if workspace is large enough for blocked code.
   134  			if lwork < n*nb+tsize {
   135  				// Not enough workspace to use optimal nb:
   136  				// determine the minimum value of nb, and reduce
   137  				// nb or force use of unblocked code.
   138  				nbmin = max(2, impl.Ilaenv(2, "DGEHRD", " ", n, ilo, ihi, -1))
   139  				if lwork >= n*nbmin+tsize {
   140  					nb = (lwork - tsize) / n
   141  				} else {
   142  					nb = 1
   143  				}
   144  			}
   145  		}
   146  	}
   147  	ldwork := nb // work is used as an n×nb matrix.
   148  
   149  	var i int
   150  	if nb < nbmin || nh <= nb {
   151  		// Use unblocked code below.
   152  		i = ilo
   153  	} else {
   154  		// Use blocked code.
   155  		bi := blas64.Implementation()
   156  		iwt := n * nb // Size of the matrix Y and index where the matrix T starts in work.
   157  		for i = ilo; i < ihi-nx; i += nb {
   158  			ib := min(nb, ihi-i)
   159  
   160  			// Reduce columns [i:i+ib] to Hessenberg form, returning the
   161  			// matrices V and T of the block reflector H = I - V*T*Vᵀ
   162  			// which performs the reduction, and also the matrix Y = A*V*T.
   163  			impl.Dlahr2(ihi+1, i+1, ib, a[i:], lda, tau[i:], work[iwt:], ldt, work, ldwork)
   164  
   165  			// Apply the block reflector H to A[:ihi+1,i+ib:ihi+1] from the
   166  			// right, computing  A := A - Y * Vᵀ. V[i+ib,i+ib-1] must be set
   167  			// to 1.
   168  			ei := a[(i+ib)*lda+i+ib-1]
   169  			a[(i+ib)*lda+i+ib-1] = 1
   170  			bi.Dgemm(blas.NoTrans, blas.Trans, ihi+1, ihi-i-ib+1, ib,
   171  				-1, work, ldwork,
   172  				a[(i+ib)*lda+i:], lda,
   173  				1, a[i+ib:], lda)
   174  			a[(i+ib)*lda+i+ib-1] = ei
   175  
   176  			// Apply the block reflector H to A[0:i+1,i+1:i+ib-1] from the
   177  			// right.
   178  			bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, i+1, ib-1,
   179  				1, a[(i+1)*lda+i:], lda, work, ldwork)
   180  			for j := 0; j <= ib-2; j++ {
   181  				bi.Daxpy(i+1, -1, work[j:], ldwork, a[i+j+1:], lda)
   182  			}
   183  
   184  			// Apply the block reflector H to A[i+1:ihi+1,i+ib:n] from the
   185  			// left.
   186  			impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
   187  				ihi-i, n-i-ib, ib,
   188  				a[(i+1)*lda+i:], lda, work[iwt:], ldt, a[(i+1)*lda+i+ib:], lda, work, ldwork)
   189  		}
   190  	}
   191  	// Use unblocked code to reduce the rest of the matrix.
   192  	impl.Dgehd2(n, i, ihi, a, lda, tau, work)
   193  	work[0] = float64(lwkopt)
   194  }