github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dorgqr.go (about)

     1  // Copyright ©2015 The gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package native
     6  
     7  import (
     8  	"github.com/gonum/blas"
     9  	"github.com/gonum/lapack"
    10  )
    11  
    12  // Dorgqr generates an m×n matrix Q with orthonormal columns defined by the
    13  // product of elementary reflectors
    14  //  Q = H_0 * H_1 * ... * H_{k-1}
    15  // as computed by Dgeqrf.
    16  // Dorgqr is the blocked version of Dorg2r that makes greater use of level-3 BLAS
    17  // routines.
    18  //
    19  // The length of tau must be at least k, and the length of work must be at least n.
    20  // It also must be that 0 <= k <= n and 0 <= n <= m.
    21  //
    22  // work is temporary storage, and lwork specifies the usable memory length. At
    23  // minimum, lwork >= n, and the amount of blocking is limited by the usable
    24  // length. If lwork == -1, instead of computing Dorgqr the optimal work length
    25  // is stored into work[0].
    26  //
    27  // Dorgqr will panic if the conditions on input values are not met.
    28  //
    29  // Dorgqr is an internal routine. It is exported for testing purposes.
    30  func (impl Implementation) Dorgqr(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
    31  	nb := impl.Ilaenv(1, "DORGQR", " ", m, n, k, -1)
    32  	// work is treated as an n×nb matrix
    33  	if lwork == -1 {
    34  		work[0] = float64(max(1, n) * nb)
    35  		return
    36  	}
    37  	checkMatrix(m, n, a, lda)
    38  	if k < 0 {
    39  		panic(kLT0)
    40  	}
    41  	if k > n {
    42  		panic(kGTN)
    43  	}
    44  	if n > m {
    45  		panic(mLTN)
    46  	}
    47  	if len(tau) < k {
    48  		panic(badTau)
    49  	}
    50  	if len(work) < lwork {
    51  		panic(shortWork)
    52  	}
    53  	if lwork < n {
    54  		panic(badWork)
    55  	}
    56  	if n == 0 {
    57  		return
    58  	}
    59  	nbmin := 2 // Minimum number of blocks
    60  	var nx int // Minimum number of rows
    61  	iws := n   // Length of work needed
    62  	var ldwork int
    63  	if nb > 1 && nb < k {
    64  		nx = max(0, impl.Ilaenv(3, "DORGQR", " ", m, n, k, -1))
    65  		if nx < k {
    66  			ldwork = nb
    67  			iws = n * ldwork
    68  			if lwork < iws {
    69  				nb = lwork / n
    70  				ldwork = nb
    71  				nbmin = max(2, impl.Ilaenv(2, "DORGQR", " ", m, n, k, -1))
    72  			}
    73  		}
    74  	}
    75  	var ki, kk int
    76  	if nb >= nbmin && nb < k && nx < k {
    77  		// The first kk columns are handled by the blocked method.
    78  		// Note: lapack has nx here, but this means the last nx rows are handled
    79  		// serially which could be quite different than nb.
    80  		ki = ((k - nb - 1) / nb) * nb
    81  		kk = min(k, ki+nb)
    82  		for j := kk; j < n; j++ {
    83  			for i := 0; i < kk; i++ {
    84  				a[i*lda+j] = 0
    85  			}
    86  		}
    87  	}
    88  	if kk < n {
    89  		// Perform the operation on colums kk to the end.
    90  		impl.Dorg2r(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
    91  	}
    92  	if kk == 0 {
    93  		return
    94  	}
    95  	// Perform the operation on column-blocks
    96  	for i := ki; i >= 0; i -= nb {
    97  		ib := min(nb, k-i)
    98  		if i+ib < n {
    99  			impl.Dlarft(lapack.Forward, lapack.ColumnWise,
   100  				m-i, ib,
   101  				a[i*lda+i:], lda,
   102  				tau[i:],
   103  				work, ldwork)
   104  
   105  			impl.Dlarfb(blas.Left, blas.NoTrans, lapack.Forward, lapack.ColumnWise,
   106  				m-i, n-i-ib, ib,
   107  				a[i*lda+i:], lda,
   108  				work, ldwork,
   109  				a[i*lda+i+ib:], lda,
   110  				work[ib*ldwork:], ldwork)
   111  		}
   112  		impl.Dorg2r(m-i, ib, ib, a[i*lda+i:], lda, tau[i:], work)
   113  		// Set rows 0:i-1 of current block to zero
   114  		for j := i; j < i+ib; j++ {
   115  			for l := 0; l < i; l++ {
   116  				a[l*lda+j] = 0
   117  			}
   118  		}
   119  	}
   120  }