github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dgeqp3.go (about)

     1  // Copyright ©2017 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"github.com/jingcheng-WU/gonum/blas"
     9  	"github.com/jingcheng-WU/gonum/blas/blas64"
    10  )
    11  
    12  // Dgeqp3 computes a QR factorization with column pivoting of the
    13  // m×n matrix A: A*P = Q*R using Level 3 BLAS.
    14  //
    15  // The matrix Q is represented as a product of elementary reflectors
    16  //  Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n).
    17  // Each H_i has the form
    18  //  H_i = I - tau * v * vᵀ
    19  // where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1;
    20  // v[i:m] is stored on exit in A[i:m, i], and tau in tau[i].
    21  //
    22  // jpvt specifies a column pivot to be applied to A. If
    23  // jpvt[j] is at least zero, the jth column of A is permuted
    24  // to the front of A*P (a leading column), if jpvt[j] is -1
    25  // the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3
    26  // will panic. On return, jpvt holds the permutation that was
    27  // applied; the jth column of A*P was the jpvt[j] column of A.
    28  // jpvt must have length n or Dgeqp3 will panic.
    29  //
    30  // tau holds the scalar factors of the elementary reflectors.
    31  // It must have length min(m, n), otherwise Dgeqp3 will panic.
    32  //
    33  // work must have length at least max(1,lwork), and lwork must be at least
    34  // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must
    35  // be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return,
    36  // work[0] will contain the optimal value of lwork.
    37  //
    38  // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork
    39  // will be stored in work[0].
    40  //
    41  // Dgeqp3 is an internal routine. It is exported for testing purposes.
    42  func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) {
    43  	const (
    44  		inb    = 1
    45  		inbmin = 2
    46  		ixover = 3
    47  	)
    48  
    49  	minmn := min(m, n)
    50  	iws := 3*n + 1
    51  	if minmn == 0 {
    52  		iws = 1
    53  	}
    54  	switch {
    55  	case m < 0:
    56  		panic(mLT0)
    57  	case n < 0:
    58  		panic(nLT0)
    59  	case lda < max(1, n):
    60  		panic(badLdA)
    61  	case lwork < iws && lwork != -1:
    62  		panic(badLWork)
    63  	case len(work) < max(1, lwork):
    64  		panic(shortWork)
    65  	}
    66  
    67  	// Quick return if possible.
    68  	if minmn == 0 {
    69  		work[0] = 1
    70  		return
    71  	}
    72  
    73  	nb := impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1)
    74  	if lwork == -1 {
    75  		work[0] = float64(2*n + (n+1)*nb)
    76  		return
    77  	}
    78  
    79  	switch {
    80  	case len(a) < (m-1)*lda+n:
    81  		panic(shortA)
    82  	case len(jpvt) != n:
    83  		panic(badLenJpvt)
    84  	case len(tau) < minmn:
    85  		panic(shortTau)
    86  	}
    87  
    88  	for _, v := range jpvt {
    89  		if v < -1 || n <= v {
    90  			panic(badJpvt)
    91  		}
    92  	}
    93  
    94  	bi := blas64.Implementation()
    95  
    96  	// Move initial columns up front.
    97  	var nfxd int
    98  	for j := 0; j < n; j++ {
    99  		if jpvt[j] == -1 {
   100  			jpvt[j] = j
   101  			continue
   102  		}
   103  		if j != nfxd {
   104  			bi.Dswap(m, a[j:], lda, a[nfxd:], lda)
   105  			jpvt[j], jpvt[nfxd] = jpvt[nfxd], j
   106  		} else {
   107  			jpvt[j] = j
   108  		}
   109  		nfxd++
   110  	}
   111  
   112  	// Factorize nfxd columns.
   113  	//
   114  	// Compute the QR factorization of nfxd columns and update remaining columns.
   115  	if nfxd > 0 {
   116  		na := min(m, nfxd)
   117  		impl.Dgeqrf(m, na, a, lda, tau, work, lwork)
   118  		iws = max(iws, int(work[0]))
   119  		if na < n {
   120  			impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda,
   121  				work, lwork)
   122  			iws = max(iws, int(work[0]))
   123  		}
   124  	}
   125  
   126  	if nfxd >= minmn {
   127  		work[0] = float64(iws)
   128  		return
   129  	}
   130  
   131  	// Factorize free columns.
   132  	sm := m - nfxd
   133  	sn := n - nfxd
   134  	sminmn := minmn - nfxd
   135  
   136  	// Determine the block size.
   137  	nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1)
   138  	nbmin := 2
   139  	nx := 0
   140  
   141  	if 1 < nb && nb < sminmn {
   142  		// Determine when to cross over from blocked to unblocked code.
   143  		nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1))
   144  
   145  		if nx < sminmn {
   146  			// Determine if workspace is large enough for blocked code.
   147  			minws := 2*sn + (sn+1)*nb
   148  			iws = max(iws, minws)
   149  			if lwork < minws {
   150  				// Not enough workspace to use optimal nb. Reduce
   151  				// nb and determine the minimum value of nb.
   152  				nb = (lwork - 2*sn) / (sn + 1)
   153  				nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1))
   154  			}
   155  		}
   156  	}
   157  
   158  	// Initialize partial column norms.
   159  	// The first n elements of work store the exact column norms.
   160  	for j := nfxd; j < n; j++ {
   161  		work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda)
   162  		work[n+j] = work[j]
   163  	}
   164  	j := nfxd
   165  	if nbmin <= nb && nb < sminmn && nx < sminmn {
   166  		// Use blocked code initially.
   167  
   168  		// Compute factorization.
   169  		var fjb int
   170  		for topbmn := minmn - nx; j < topbmn; j += fjb {
   171  			jb := min(nb, topbmn-j)
   172  
   173  			// Factorize jb columns among columns j:n.
   174  			fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:],
   175  				work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb)
   176  		}
   177  	}
   178  
   179  	// Use unblocked code to factor the last or only block.
   180  	if j < minmn {
   181  		impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:],
   182  			work[j:n], work[j+n:2*n], work[2*n:])
   183  	}
   184  
   185  	work[0] = float64(iws)
   186  }