gonum.org/v1/gonum@v0.14.0/lapack/gonum/dgeqrf.go

gonum.org/v1/gonum@v0.14.0/lapack/gonum/dgeqrf.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"gonum.org/v1/gonum/blas"
     9  	"gonum.org/v1/gonum/lapack"
    10  )
    11  
    12  // Dgeqrf computes the QR factorization of the m×n matrix A using a blocked
    13  // algorithm. See the documentation for Dgeqr2 for a description of the
    14  // parameters at entry and exit.
    15  //
    16  // work is temporary storage, and lwork specifies the usable memory length.
    17  // The length of work must be at least max(1, lwork) and lwork must be -1
    18  // or at least n, otherwise this function will panic.
    19  // Dgeqrf is a blocked QR factorization, but the block size is limited
    20  // by the temporary space available. If lwork == -1, instead of performing Dgeqrf,
    21  // the optimal work length will be stored into work[0].
    22  //
    23  // tau must have length at least min(m,n), and this function will panic otherwise.
    24  func (impl Implementation) Dgeqrf(m, n int, a []float64, lda int, tau, work []float64, lwork int) {
    25  	switch {
    26  	case m < 0:
    27  		panic(mLT0)
    28  	case n < 0:
    29  		panic(nLT0)
    30  	case lda < max(1, n):
    31  		panic(badLdA)
    32  	case lwork < max(1, n) && lwork != -1:
    33  		panic(badLWork)
    34  	case len(work) < max(1, lwork):
    35  		panic(shortWork)
    36  	}
    37  
    38  	// Quick return if possible.
    39  	k := min(m, n)
    40  	if k == 0 {
    41  		work[0] = 1
    42  		return
    43  	}
    44  
    45  	// nb is the optimal blocksize, i.e. the number of columns transformed at a time.
    46  	nb := impl.Ilaenv(1, "DGEQRF", " ", m, n, -1, -1)
    47  	if lwork == -1 {
    48  		work[0] = float64(n * nb)
    49  		return
    50  	}
    51  
    52  	if len(a) < (m-1)*lda+n {
    53  		panic(shortA)
    54  	}
    55  	if len(tau) < k {
    56  		panic(shortTau)
    57  	}
    58  
    59  	nbmin := 2 // Minimal block size.
    60  	var nx int // Use unblocked (unless changed in the next for loop)
    61  	iws := n
    62  	// Only consider blocked if the suggested block size is > 1 and the
    63  	// number of rows or columns is sufficiently large.
    64  	if 1 < nb && nb < k {
    65  		// nx is the block size at which the code switches from blocked
    66  		// to unblocked.
    67  		nx = max(0, impl.Ilaenv(3, "DGEQRF", " ", m, n, -1, -1))
    68  		if k > nx {
    69  			iws = n * nb
    70  			if lwork < iws {
    71  				// Not enough workspace to use the optimal block
    72  				// size. Get the minimum block size instead.
    73  				nb = lwork / n
    74  				nbmin = max(2, impl.Ilaenv(2, "DGEQRF", " ", m, n, -1, -1))
    75  			}
    76  		}
    77  	}
    78  
    79  	// Compute QR using a blocked algorithm.
    80  	var i int
    81  	if nbmin <= nb && nb < k && nx < k {
    82  		ldwork := nb
    83  		for i = 0; i < k-nx; i += nb {
    84  			ib := min(k-i, nb)
    85  			// Compute the QR factorization of the current block.
    86  			impl.Dgeqr2(m-i, ib, a[i*lda+i:], lda, tau[i:], work)
    87  			if i+ib < n {
    88  				// Form the triangular factor of the block reflector and apply Hᵀ
    89  				// In Dlarft, work becomes the T matrix.
    90  				impl.Dlarft(lapack.Forward, lapack.ColumnWise, m-i, ib,
    91  					a[i*lda+i:], lda,
    92  					tau[i:],
    93  					work, ldwork)
    94  				impl.Dlarfb(blas.Left, blas.Trans, lapack.Forward, lapack.ColumnWise,
    95  					m-i, n-i-ib, ib,
    96  					a[i*lda+i:], lda,
    97  					work, ldwork,
    98  					a[i*lda+i+ib:], lda,
    99  					work[ib*ldwork:], ldwork)
   100  			}
   101  		}
   102  	}
   103  	// Call unblocked code on the remaining columns.
   104  	if i < k {
   105  		impl.Dgeqr2(m-i, n-i, a[i*lda+i:], lda, tau[i:], work)
   106  	}
   107  	work[0] = float64(iws)
   108  }