github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dgehrd.go (about)

     1  // Copyright ©2016 The gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package native
     6  
     7  import (
     8  	"github.com/gonum/blas"
     9  	"github.com/gonum/blas/blas64"
    10  	"github.com/gonum/lapack"
    11  )
    12  
    13  // Dgehrd reduces a block of a real n×n general matrix A to upper Hessenberg
    14  // form H by an orthogonal similarity transformation Q^T * A * Q = H.
    15  //
    16  // The matrix Q is represented as a product of (ihi-ilo) elementary
    17  // reflectors
    18  //  Q = H_{ilo} H_{ilo+1} ... H_{ihi-1}.
    19  // Each H_i has the form
    20  //  H_i = I - tau[i] * v * v^T
    21  // where v is a real vector with v[0:i+1] = 0, v[i+1] = 1 and v[ihi+1:n] = 0.
    22  // v[i+2:ihi+1] is stored on exit in A[i+2:ihi+1,i].
    23  //
    24  // On entry, a contains the n×n general matrix to be reduced. On return, the
    25  // upper triangle and the first subdiagonal of A will be overwritten with the
    26  // upper Hessenberg matrix H, and the elements below the first subdiagonal, with
    27  // the slice tau, represent the orthogonal matrix Q as a product of elementary
    28  // reflectors.
    29  //
    30  // The contents of a are illustrated by the following example, with n = 7, ilo =
    31  // 1 and ihi = 5.
    32  // On entry,
    33  //  [ a   a   a   a   a   a   a ]
    34  //  [     a   a   a   a   a   a ]
    35  //  [     a   a   a   a   a   a ]
    36  //  [     a   a   a   a   a   a ]
    37  //  [     a   a   a   a   a   a ]
    38  //  [     a   a   a   a   a   a ]
    39  //  [                         a ]
    40  // on return,
    41  //  [ a   a   h   h   h   h   a ]
    42  //  [     a   h   h   h   h   a ]
    43  //  [     h   h   h   h   h   h ]
    44  //  [     v1  h   h   h   h   h ]
    45  //  [     v1  v2  h   h   h   h ]
    46  //  [     v1  v2  v3  h   h   h ]
    47  //  [                         a ]
    48  // where a denotes an element of the original matrix A, h denotes a
    49  // modified element of the upper Hessenberg matrix H, and vi denotes an
    50  // element of the vector defining H_i.
    51  //
    52  // ilo and ihi determine the block of A that will be reduced to upper Hessenberg
    53  // form. It must hold that 0 <= ilo <= ihi < n if n > 0, and ilo == 0 and ihi ==
    54  // -1 if n == 0, otherwise Dgehrd will panic.
    55  //
    56  // On return, tau will contain the scalar factors of the elementary reflectors.
    57  // Elements tau[:ilo] and tau[ihi:] will be set to zero. tau must have length
    58  // equal to n-1 if n > 0, otherwise Dgehrd will panic.
    59  //
    60  // work must have length at least lwork and lwork must be at least max(1,n),
    61  // otherwise Dgehrd will panic. On return, work[0] contains the optimal value of
    62  // lwork.
    63  //
    64  // If lwork == -1, instead of performing Dgehrd, only the optimal value of lwork
    65  // will be stored in work[0].
    66  //
    67  // Dgehrd is an internal routine. It is exported for testing purposes.
    68  func (impl Implementation) Dgehrd(n, ilo, ihi int, a []float64, lda int, tau, work []float64, lwork int) {
    69  	switch {
    70  	case ilo < 0 || max(0, n-1) < ilo:
    71  		panic(badIlo)
    72  	case ihi < min(ilo, n-1) || n <= ihi:
    73  		panic(badIhi)
    74  	case lwork < max(1, n) && lwork != -1:
    75  		panic(badWork)
    76  	case len(work) < lwork:
    77  		panic(shortWork)
    78  	}
    79  	if lwork != -1 {
    80  		checkMatrix(n, n, a, lda)
    81  		if len(tau) != n-1 && n > 0 {
    82  			panic(badTau)
    83  		}
    84  	}
    85  
    86  	const (
    87  		nbmax = 64
    88  		ldt   = nbmax + 1
    89  		tsize = ldt * nbmax
    90  	)
    91  	// Compute the workspace requirements.
    92  	nb := min(nbmax, impl.Ilaenv(1, "DGEHRD", " ", n, ilo, ihi, -1))
    93  	lwkopt := n*nb + tsize
    94  	if lwork == -1 {
    95  		work[0] = float64(lwkopt)
    96  		return
    97  	}
    98  
    99  	// Set tau[:ilo] and tau[ihi:] to zero.
   100  	for i := 0; i < ilo; i++ {
   101  		tau[i] = 0
   102  	}
   103  	for i := ihi; i < n-1; i++ {
   104  		tau[i] = 0
   105  	}
   106  
   107  	// Quick return if possible.
   108  	nh := ihi - ilo + 1
   109  	if nh <= 1 {
   110  		work[0] = 1
   111  		return
   112  	}
   113  
   114  	// Determine the block size.
   115  	nbmin := 2
   116  	var nx int
   117  	if 1 < nb && nb < nh {
   118  		// Determine when to cross over from blocked to unblocked code
   119  		// (last block is always handled by unblocked code).
   120  		nx = max(nb, impl.Ilaenv(3, "DGEHRD", " ", n, ilo, ihi, -1))
   121  		if nx < nh {
   122  			// Determine if workspace is large enough for blocked code.
   123  			if lwork < n*nb+tsize {
   124  				// Not enough workspace to use optimal nb:
   125  				// determine the minimum value of nb, and reduce
   126  				// nb or force use of unblocked code.
   127  				nbmin = max(2, impl.Ilaenv(2, "DGEHRD", " ", n, ilo, ihi, -1))
   128  				if lwork >= n*nbmin+tsize {
   129  					nb = (lwork - tsize) / n
   130  				} else {
   131  					nb = 1
   132  				}
   133  			}
   134  		}
   135  	}
   136  	ldwork := nb // work is used as an n×nb matrix.
   137  
   138  	var i int
   139  	if nb < nbmin || nh <= nb {
   140  		// Use unblocked code below.
   141  		i = ilo
   142  	} else {
   143  		// Use blocked code.
   144  		bi := blas64.Implementation()
   145  		iwt := n * nb // Size of the matrix Y and index where the matrix T starts in work.
   146  		for i = ilo; i < ihi-nx; i += nb {
   147  			ib := min(nb, ihi-i)
   148  
   149  			// Reduce columns [i:i+ib] to Hessenberg form, returning the
   150  			// matrices V and T of the block reflector H = I - V*T*V^T
   151  			// which performs the reduction, and also the matrix Y = A*V*T.
   152  			impl.Dlahr2(ihi+1, i+1, ib, a[i:], lda, tau[i:], work[iwt:], ldt, work, ldwork)
   153  
   154  			// Apply the block reflector H to A[:ihi+1,i+ib:ihi+1] from the
   155  			// right, computing  A := A - Y * V^T. V[i+ib,i+ib-1] must be set
   156  			// to 1.
   157  			ei := a[(i+ib)*lda+i+ib-1]
   158  			a[(i+ib)*lda+i+ib-1] = 1
   159  			bi.Dgemm(blas.NoTrans, blas.Trans, ihi+1, ihi-i-ib+1, ib,
   160  				-1, work, ldwork,
   161  				a[(i+ib)*lda+i:], lda,
   162  				1, a[i+ib:], lda)
   163  			a[(i+ib)*lda+i+ib-1] = ei
   164  
   165  			// Apply the block reflector H to A[0:i+1,i+1:i+ib-1] from the
   166  			// right.
   167  			bi.Dtrmm(blas.Right, blas.Lower, blas.Trans, blas.Unit, i+1, ib-1,
   168  				1, a[(i+1)*lda+i:], lda, work, ldwork)
   169  			for j := 0; j <= ib-2; j++ {
   170  				bi.Daxpy(i+1, -1, work[j:], ldwork, a[i+j+1:], lda)
   171  			}
   172  
   173  			// Apply the block reflector H to A[i+1:ihi+1,i+ib:n] from the
   174  			// left.
   175  			impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
   176  				ihi-i, n-i-ib, ib,
   177  				a[(i+1)*lda+i:], lda, work[iwt:], ldt, a[(i+1)*lda+i+ib:], lda, work, ldwork)
   178  		}
   179  	}
   180  	// Use unblocked code to reduce the rest of the matrix.
   181  	impl.Dgehd2(n, i, ihi, a, lda, tau, work)
   182  	work[0] = float64(lwkopt)
   183  }