gonum.org/v1/gonum@v0.15.1-0.20240517103525-f853624cb1bb/lapack/gonum/dgeqp3.go (about)

     1  // Copyright ©2017 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"gonum.org/v1/gonum/blas"
     9  	"gonum.org/v1/gonum/blas/blas64"
    10  )
    11  
    12  // Dgeqp3 computes a QR factorization with column pivoting of the m×n matrix A:
    13  //
    14  //	A*P = Q*R
    15  //
    16  // where P is a permutation matrix, Q is an orthogonal matrix and R is a
    17  // min(m,n)×n upper trapezoidal matrix.
    18  //
    19  // On return, the upper triangle of A contains the matrix R. The elements below
    20  // the diagonal together with tau represent the matrix Q as a product of
    21  // elementary reflectors
    22  //
    23  //	Q = H_0 * H_1 * ... * H_{k-1}, where k = min(m,n).
    24  //
    25  // Each H_i has the form
    26  //
    27  //	H_i = I - tau * v * vᵀ
    28  //
    29  // where tau is a scalar and v is a vector with v[0:i] = 0 and v[i] = 1;
    30  // v[i+1:m] is stored on exit in A[i+1:m,i], and tau in tau[i].
    31  //
    32  // jpvt specifies a column pivot to be applied to A. On entry, if jpvt[j] is at
    33  // least zero, the jth column of A is permuted to the front of A*P (a leading
    34  // column), if jpvt[j] is -1 the jth column of A is a free column. If jpvt[j] <
    35  // -1, Dgeqp3 will panic. On return, jpvt holds the permutation that was
    36  // applied; the jth column of A*P was the jpvt[j] column of A. jpvt must have
    37  // length n or Dgeqp3 will panic.
    38  //
    39  // tau holds the scalar factors of the elementary reflectors. It must have
    40  // length min(m,n), otherwise Dgeqp3 will panic.
    41  //
    42  // work must have length at least max(1,lwork), and lwork must be at least
    43  // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must be at
    44  // least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return, work[0]
    45  // will contain the optimal value of lwork.
    46  //
    47  // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork
    48  // will be stored in work[0].
    49  //
    50  // Dgeqp3 is an internal routine. It is exported for testing purposes.
    51  func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) {
    52  	const (
    53  		inb    = 1
    54  		inbmin = 2
    55  		ixover = 3
    56  	)
    57  
    58  	minmn := min(m, n)
    59  	iws := 3*n + 1
    60  	if minmn == 0 {
    61  		iws = 1
    62  	}
    63  	switch {
    64  	case m < 0:
    65  		panic(mLT0)
    66  	case n < 0:
    67  		panic(nLT0)
    68  	case lda < max(1, n):
    69  		panic(badLdA)
    70  	case lwork < iws && lwork != -1:
    71  		panic(badLWork)
    72  	case len(work) < max(1, lwork):
    73  		panic(shortWork)
    74  	}
    75  
    76  	// Quick return if possible.
    77  	if minmn == 0 {
    78  		work[0] = 1
    79  		return
    80  	}
    81  
    82  	nb := impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1)
    83  	if lwork == -1 {
    84  		work[0] = float64(2*n + (n+1)*nb)
    85  		return
    86  	}
    87  
    88  	switch {
    89  	case len(a) < (m-1)*lda+n:
    90  		panic(shortA)
    91  	case len(jpvt) != n:
    92  		panic(badLenJpvt)
    93  	case len(tau) < minmn:
    94  		panic(shortTau)
    95  	}
    96  
    97  	for _, v := range jpvt {
    98  		if v < -1 || n <= v {
    99  			panic(badJpvt)
   100  		}
   101  	}
   102  
   103  	bi := blas64.Implementation()
   104  
   105  	// Move initial columns up front.
   106  	var nfxd int
   107  	for j := 0; j < n; j++ {
   108  		if jpvt[j] == -1 {
   109  			jpvt[j] = j
   110  			continue
   111  		}
   112  		if j != nfxd {
   113  			bi.Dswap(m, a[j:], lda, a[nfxd:], lda)
   114  			jpvt[j], jpvt[nfxd] = jpvt[nfxd], j
   115  		} else {
   116  			jpvt[j] = j
   117  		}
   118  		nfxd++
   119  	}
   120  
   121  	// Factorize nfxd columns.
   122  	//
   123  	// Compute the QR factorization of nfxd columns and update remaining columns.
   124  	if nfxd > 0 {
   125  		na := min(m, nfxd)
   126  		impl.Dgeqrf(m, na, a, lda, tau[:na], work, lwork)
   127  		iws = max(iws, int(work[0]))
   128  		if na < n {
   129  			impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda,
   130  				work, lwork)
   131  			iws = max(iws, int(work[0]))
   132  		}
   133  	}
   134  
   135  	if nfxd >= minmn {
   136  		work[0] = float64(iws)
   137  		return
   138  	}
   139  
   140  	// Factorize free columns.
   141  	sm := m - nfxd
   142  	sn := n - nfxd
   143  	sminmn := minmn - nfxd
   144  
   145  	// Determine the block size.
   146  	nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1)
   147  	nbmin := 2
   148  	nx := 0
   149  
   150  	if 1 < nb && nb < sminmn {
   151  		// Determine when to cross over from blocked to unblocked code.
   152  		nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1))
   153  
   154  		if nx < sminmn {
   155  			// Determine if workspace is large enough for blocked code.
   156  			minws := 2*sn + (sn+1)*nb
   157  			iws = max(iws, minws)
   158  			if lwork < minws {
   159  				// Not enough workspace to use optimal nb. Reduce
   160  				// nb and determine the minimum value of nb.
   161  				nb = (lwork - 2*sn) / (sn + 1)
   162  				nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1))
   163  			}
   164  		}
   165  	}
   166  
   167  	// Initialize partial column norms.
   168  	// The first n elements of work store the exact column norms.
   169  	for j := nfxd; j < n; j++ {
   170  		work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda)
   171  		work[n+j] = work[j]
   172  	}
   173  	j := nfxd
   174  	if nbmin <= nb && nb < sminmn && nx < sminmn {
   175  		// Use blocked code initially.
   176  
   177  		// Compute factorization.
   178  		var fjb int
   179  		for topbmn := minmn - nx; j < topbmn; j += fjb {
   180  			jb := min(nb, topbmn-j)
   181  
   182  			// Factorize jb columns among columns j:n.
   183  			fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:],
   184  				work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb)
   185  		}
   186  	}
   187  
   188  	// Use unblocked code to factor the last or only block.
   189  	if j < minmn {
   190  		impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:],
   191  			work[j:n], work[j+n:2*n], work[2*n:])
   192  	}
   193  
   194  	work[0] = float64(iws)
   195  }