gonum.org/v1/gonum@v0.15.1-0.20240517103525-f853624cb1bb/lapack/gonum/dgeqp3.go (about) 1 // Copyright ©2017 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "gonum.org/v1/gonum/blas" 9 "gonum.org/v1/gonum/blas/blas64" 10 ) 11 12 // Dgeqp3 computes a QR factorization with column pivoting of the m×n matrix A: 13 // 14 // A*P = Q*R 15 // 16 // where P is a permutation matrix, Q is an orthogonal matrix and R is a 17 // min(m,n)×n upper trapezoidal matrix. 18 // 19 // On return, the upper triangle of A contains the matrix R. The elements below 20 // the diagonal together with tau represent the matrix Q as a product of 21 // elementary reflectors 22 // 23 // Q = H_0 * H_1 * ... * H_{k-1}, where k = min(m,n). 24 // 25 // Each H_i has the form 26 // 27 // H_i = I - tau * v * vᵀ 28 // 29 // where tau is a scalar and v is a vector with v[0:i] = 0 and v[i] = 1; 30 // v[i+1:m] is stored on exit in A[i+1:m,i], and tau in tau[i]. 31 // 32 // jpvt specifies a column pivot to be applied to A. On entry, if jpvt[j] is at 33 // least zero, the jth column of A is permuted to the front of A*P (a leading 34 // column), if jpvt[j] is -1 the jth column of A is a free column. If jpvt[j] < 35 // -1, Dgeqp3 will panic. On return, jpvt holds the permutation that was 36 // applied; the jth column of A*P was the jpvt[j] column of A. jpvt must have 37 // length n or Dgeqp3 will panic. 38 // 39 // tau holds the scalar factors of the elementary reflectors. It must have 40 // length min(m,n), otherwise Dgeqp3 will panic. 41 // 42 // work must have length at least max(1,lwork), and lwork must be at least 43 // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must be at 44 // least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return, work[0] 45 // will contain the optimal value of lwork. 46 // 47 // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork 48 // will be stored in work[0]. 49 // 50 // Dgeqp3 is an internal routine. It is exported for testing purposes. 51 func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) { 52 const ( 53 inb = 1 54 inbmin = 2 55 ixover = 3 56 ) 57 58 minmn := min(m, n) 59 iws := 3*n + 1 60 if minmn == 0 { 61 iws = 1 62 } 63 switch { 64 case m < 0: 65 panic(mLT0) 66 case n < 0: 67 panic(nLT0) 68 case lda < max(1, n): 69 panic(badLdA) 70 case lwork < iws && lwork != -1: 71 panic(badLWork) 72 case len(work) < max(1, lwork): 73 panic(shortWork) 74 } 75 76 // Quick return if possible. 77 if minmn == 0 { 78 work[0] = 1 79 return 80 } 81 82 nb := impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1) 83 if lwork == -1 { 84 work[0] = float64(2*n + (n+1)*nb) 85 return 86 } 87 88 switch { 89 case len(a) < (m-1)*lda+n: 90 panic(shortA) 91 case len(jpvt) != n: 92 panic(badLenJpvt) 93 case len(tau) < minmn: 94 panic(shortTau) 95 } 96 97 for _, v := range jpvt { 98 if v < -1 || n <= v { 99 panic(badJpvt) 100 } 101 } 102 103 bi := blas64.Implementation() 104 105 // Move initial columns up front. 106 var nfxd int 107 for j := 0; j < n; j++ { 108 if jpvt[j] == -1 { 109 jpvt[j] = j 110 continue 111 } 112 if j != nfxd { 113 bi.Dswap(m, a[j:], lda, a[nfxd:], lda) 114 jpvt[j], jpvt[nfxd] = jpvt[nfxd], j 115 } else { 116 jpvt[j] = j 117 } 118 nfxd++ 119 } 120 121 // Factorize nfxd columns. 122 // 123 // Compute the QR factorization of nfxd columns and update remaining columns. 124 if nfxd > 0 { 125 na := min(m, nfxd) 126 impl.Dgeqrf(m, na, a, lda, tau[:na], work, lwork) 127 iws = max(iws, int(work[0])) 128 if na < n { 129 impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda, 130 work, lwork) 131 iws = max(iws, int(work[0])) 132 } 133 } 134 135 if nfxd >= minmn { 136 work[0] = float64(iws) 137 return 138 } 139 140 // Factorize free columns. 141 sm := m - nfxd 142 sn := n - nfxd 143 sminmn := minmn - nfxd 144 145 // Determine the block size. 146 nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1) 147 nbmin := 2 148 nx := 0 149 150 if 1 < nb && nb < sminmn { 151 // Determine when to cross over from blocked to unblocked code. 152 nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1)) 153 154 if nx < sminmn { 155 // Determine if workspace is large enough for blocked code. 156 minws := 2*sn + (sn+1)*nb 157 iws = max(iws, minws) 158 if lwork < minws { 159 // Not enough workspace to use optimal nb. Reduce 160 // nb and determine the minimum value of nb. 161 nb = (lwork - 2*sn) / (sn + 1) 162 nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1)) 163 } 164 } 165 } 166 167 // Initialize partial column norms. 168 // The first n elements of work store the exact column norms. 169 for j := nfxd; j < n; j++ { 170 work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda) 171 work[n+j] = work[j] 172 } 173 j := nfxd 174 if nbmin <= nb && nb < sminmn && nx < sminmn { 175 // Use blocked code initially. 176 177 // Compute factorization. 178 var fjb int 179 for topbmn := minmn - nx; j < topbmn; j += fjb { 180 jb := min(nb, topbmn-j) 181 182 // Factorize jb columns among columns j:n. 183 fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:], 184 work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb) 185 } 186 } 187 188 // Use unblocked code to factor the last or only block. 189 if j < minmn { 190 impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:], 191 work[j:n], work[j+n:2*n], work[2*n:]) 192 } 193 194 work[0] = float64(iws) 195 }