github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dorglq.go (about)

     1  // Copyright ©2015 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"github.com/jingcheng-WU/gonum/blas"
     9  	"github.com/jingcheng-WU/gonum/lapack"
    10  )
    11  
    12  // Dorglq generates an m×n matrix Q with orthonormal columns defined by the
    13  // product of elementary reflectors as computed by Dgelqf.
    14  //  Q = H_0 * H_1 * ... * H_{k-1}
    15  // Dorglq is the blocked version of Dorgl2 that makes greater use of level-3 BLAS
    16  // routines.
    17  //
    18  // len(tau) >= k, 0 <= k <= m, and 0 <= m <= n.
    19  //
    20  // work is temporary storage, and lwork specifies the usable memory length. At minimum,
    21  // lwork >= m, and the amount of blocking is limited by the usable length.
    22  // If lwork == -1, instead of computing Dorglq the optimal work length is stored
    23  // into work[0].
    24  //
    25  // Dorglq will panic if the conditions on input values are not met.
    26  //
    27  // Dorglq is an internal routine. It is exported for testing purposes.
    28  func (impl Implementation) Dorglq(m, n, k int, a []float64, lda int, tau, work []float64, lwork int) {
    29  	switch {
    30  	case m < 0:
    31  		panic(mLT0)
    32  	case n < m:
    33  		panic(nLTM)
    34  	case k < 0:
    35  		panic(kLT0)
    36  	case k > m:
    37  		panic(kGTM)
    38  	case lda < max(1, n):
    39  		panic(badLdA)
    40  	case lwork < max(1, m) && lwork != -1:
    41  		panic(badLWork)
    42  	case len(work) < max(1, lwork):
    43  		panic(shortWork)
    44  	}
    45  
    46  	if m == 0 {
    47  		work[0] = 1
    48  		return
    49  	}
    50  
    51  	nb := impl.Ilaenv(1, "DORGLQ", " ", m, n, k, -1)
    52  	if lwork == -1 {
    53  		work[0] = float64(m * nb)
    54  		return
    55  	}
    56  
    57  	switch {
    58  	case len(a) < (m-1)*lda+n:
    59  		panic(shortA)
    60  	case len(tau) < k:
    61  		panic(shortTau)
    62  	}
    63  
    64  	nbmin := 2 // Minimum block size
    65  	var nx int // Crossover size from blocked to unbloked code
    66  	iws := m   // Length of work needed
    67  	var ldwork int
    68  	if 1 < nb && nb < k {
    69  		nx = max(0, impl.Ilaenv(3, "DORGLQ", " ", m, n, k, -1))
    70  		if nx < k {
    71  			ldwork = nb
    72  			iws = m * ldwork
    73  			if lwork < iws {
    74  				nb = lwork / m
    75  				ldwork = nb
    76  				nbmin = max(2, impl.Ilaenv(2, "DORGLQ", " ", m, n, k, -1))
    77  			}
    78  		}
    79  	}
    80  
    81  	var ki, kk int
    82  	if nbmin <= nb && nb < k && nx < k {
    83  		// The first kk rows are handled by the blocked method.
    84  		ki = ((k - nx - 1) / nb) * nb
    85  		kk = min(k, ki+nb)
    86  		for i := kk; i < m; i++ {
    87  			for j := 0; j < kk; j++ {
    88  				a[i*lda+j] = 0
    89  			}
    90  		}
    91  	}
    92  	if kk < m {
    93  		// Perform the operation on colums kk to the end.
    94  		impl.Dorgl2(m-kk, n-kk, k-kk, a[kk*lda+kk:], lda, tau[kk:], work)
    95  	}
    96  	if kk > 0 {
    97  		// Perform the operation on column-blocks
    98  		for i := ki; i >= 0; i -= nb {
    99  			ib := min(nb, k-i)
   100  			if i+ib < m {
   101  				impl.Dlarft(lapack.Forward, lapack.RowWise,
   102  					n-i, ib,
   103  					a[i*lda+i:], lda,
   104  					tau[i:],
   105  					work, ldwork)
   106  
   107  				impl.Dlarfb(blas.Right, blas.Trans, lapack.Forward, lapack.RowWise,
   108  					m-i-ib, n-i, ib,
   109  					a[i*lda+i:], lda,
   110  					work, ldwork,
   111  					a[(i+ib)*lda+i:], lda,
   112  					work[ib*ldwork:], ldwork)
   113  			}
   114  			impl.Dorgl2(ib, n-i, ib, a[i*lda+i:], lda, tau[i:], work)
   115  			for l := i; l < i+ib; l++ {
   116  				for j := 0; j < i; j++ {
   117  					a[l*lda+j] = 0
   118  				}
   119  			}
   120  		}
   121  	}
   122  	work[0] = float64(iws)
   123  }