gonum.org/v1/gonum@v0.15.1-0.20240517103525-f853624cb1bb/lapack/gonum/dorglq.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"gonum.org/v1/gonum/blas"
     9  	"gonum.org/v1/gonum/lapack"
    10  )
    11  
    12  // Dorglq generates an m×n matrix Q with orthonormal rows defined as the first m
    13  // rows of a product of k elementary reflectors of order n
    14  //
    15  //	Q = H_{k-1} * ... * H_0
    16  //
    17  // as returned by Dgelqf.
    18  //
    19  // On entry, tau and the first k rows of A must contain the scalar factors and
    20  // the vectors, respectively, which define the elementary reflectors H_i,
    21  // i=0,...,k-1, as returned by Dgelqf. On return, A contains the matrix Q.
    22  //
    23  // tau must have length at least k, work must have length at least lwork and
    24  // lwork must be at least max(1,m). On return, optimal value of lwork will be
    25  // stored in work[0]. It must also hold that 0 <= k <= m <= n, otherwise Dorglq
    26  // will panic.
    27  //
    28  // If lwork == -1, instead of performing Dorglq, the function only calculates
    29  // the optimal value of lwork and stores it into work[0].
    30  func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
    31  	switch {
    32  	case m < 0:
    33  		panic(mLT0)
    34  	case n < m:
    35  		panic(nLTM)
    36  	case k < 0:
    37  		panic(kLT0)
    38  	case k > m:
    39  		panic(kGTM)
    40  	case lda < max(1, n):
    41  		panic(badLdA)
    42  	case lwork < max(1, m) && lwork != -1:
    43  		panic(badLWork)
    44  	case len(work) < max(1, lwork):
    45  		panic(shortWork)
    46  	}
    47  
    48  	if m == 0 {
    49  		work[0] = 1
    50  		return
    51  	}
    52  
    53  	nb := impl.Ilaenv(1, "DORGLQ", " ", m, n, k, -1)
    54  	if lwork == -1 {
    55  		work[0] = float64(m * nb)
    56  		return
    57  	}
    58  
    59  	switch {
    60  	case len(a) < (m-1)*lda+n:
    61  		panic(shortA)
    62  	case len(tau) < k:
    63  		panic(shortTau)
    64  	}
    65  
    66  	nbmin := 2 // Minimum block size
    67  	var nx int // Crossover size from blocked to unblocked code
    68  	iws := m   // Length of work needed
    69  	var ldwork int
    70  	if 1 < nb && nb < k {
    71  		nx = max(0, impl.Ilaenv(3, "DORGLQ", " ", m, n, k, -1))
    72  		if nx < k {
    73  			ldwork = nb
    74  			iws = m * ldwork
    75  			if lwork < iws {
    76  				nb = lwork / m
    77  				ldwork = nb
    78  				nbmin = max(2, impl.Ilaenv(2, "DORGLQ", " ", m, n, k, -1))
    79  			}
    80  		}
    81  	}
    82  
    83  	var ki, kk int
    84  	if nbmin <= nb && nb < k && nx < k {
    85  		// The first kk rows are handled by the blocked method.
    86  		ki = ((k - nx - 1) / nb) * nb
    87  		kk = min(k, ki+nb)
    88  		for i := kk; i < m; i++ {
    89  			for j := 0; j < kk; j++ {
    90  				a[i*lda+j] = 0
    91  			}
    92  		}
    93  	}
    94  	if kk < m {
    95  		// Perform the operation on columns kk to the end.
    96  		impl.Dorgl2(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
    97  	}
    98  	if kk > 0 {
    99  		// Perform the operation on column-blocks
   100  		for i := ki; i >= 0; i -= nb {
   101  			ib := min(nb, k-i)
   102  			if i+ib < m {
   103  				impl.Dlarft(lapack.Forward, lapack.RowWise,
   104  					n-i, ib,
   105  					a[i*lda+i:], lda,
   106  					tau[i:],
   107  					work, ldwork)
   108  
   109  				impl.Dlarfb(blas.Right, blas.Trans, lapack.Forward, lapack.RowWise,
   110  					m-i-ib, n-i, ib,
   111  					a[i*lda+i:], lda,
   112  					work, ldwork,
   113  					a[(i+ib)*lda+i:], lda,
   114  					work[ib*ldwork:], ldwork)
   115  			}
   116  			impl.Dorgl2(ib, n-i, ib, a[i*lda+i:], lda, tau[i:], work)
   117  			for l := i; l < i+ib; l++ {
   118  				for j := 0; j < i; j++ {
   119  					a[l*lda+j] = 0
   120  				}
   121  			}
   122  		}
   123  	}
   124  	work[0] = float64(iws)
   125  }