gonum.org/v1/gonum@v0.14.0/lapack/gonum/dgeqp3.go (about) 1 // Copyright ©2017 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "gonum.org/v1/gonum/blas" 9 "gonum.org/v1/gonum/blas/blas64" 10 ) 11 12 // Dgeqp3 computes a QR factorization with column pivoting of the 13 // m×n matrix A: A*P = Q*R using Level 3 BLAS. 14 // 15 // The matrix Q is represented as a product of elementary reflectors 16 // 17 // Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n). 18 // 19 // Each H_i has the form 20 // 21 // H_i = I - tau * v * vᵀ 22 // 23 // where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1; 24 // v[i:m] is stored on exit in A[i:m, i], and tau in tau[i]. 25 // 26 // jpvt specifies a column pivot to be applied to A. If 27 // jpvt[j] is at least zero, the jth column of A is permuted 28 // to the front of A*P (a leading column), if jpvt[j] is -1 29 // the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3 30 // will panic. On return, jpvt holds the permutation that was 31 // applied; the jth column of A*P was the jpvt[j] column of A. 32 // jpvt must have length n or Dgeqp3 will panic. 33 // 34 // tau holds the scalar factors of the elementary reflectors. 35 // It must have length min(m, n), otherwise Dgeqp3 will panic. 36 // 37 // work must have length at least max(1,lwork), and lwork must be at least 38 // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must 39 // be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return, 40 // work[0] will contain the optimal value of lwork. 41 // 42 // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork 43 // will be stored in work[0]. 44 // 45 // Dgeqp3 is an internal routine. It is exported for testing purposes. 46 func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) { 47 const ( 48 inb = 1 49 inbmin = 2 50 ixover = 3 51 ) 52 53 minmn := min(m, n) 54 iws := 3*n + 1 55 if minmn == 0 { 56 iws = 1 57 } 58 switch { 59 case m < 0: 60 panic(mLT0) 61 case n < 0: 62 panic(nLT0) 63 case lda < max(1, n): 64 panic(badLdA) 65 case lwork < iws && lwork != -1: 66 panic(badLWork) 67 case len(work) < max(1, lwork): 68 panic(shortWork) 69 } 70 71 // Quick return if possible. 72 if minmn == 0 { 73 work[0] = 1 74 return 75 } 76 77 nb := impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1) 78 if lwork == -1 { 79 work[0] = float64(2*n + (n+1)*nb) 80 return 81 } 82 83 switch { 84 case len(a) < (m-1)*lda+n: 85 panic(shortA) 86 case len(jpvt) != n: 87 panic(badLenJpvt) 88 case len(tau) < minmn: 89 panic(shortTau) 90 } 91 92 for _, v := range jpvt { 93 if v < -1 || n <= v { 94 panic(badJpvt) 95 } 96 } 97 98 bi := blas64.Implementation() 99 100 // Move initial columns up front. 101 var nfxd int 102 for j := 0; j < n; j++ { 103 if jpvt[j] == -1 { 104 jpvt[j] = j 105 continue 106 } 107 if j != nfxd { 108 bi.Dswap(m, a[j:], lda, a[nfxd:], lda) 109 jpvt[j], jpvt[nfxd] = jpvt[nfxd], j 110 } else { 111 jpvt[j] = j 112 } 113 nfxd++ 114 } 115 116 // Factorize nfxd columns. 117 // 118 // Compute the QR factorization of nfxd columns and update remaining columns. 119 if nfxd > 0 { 120 na := min(m, nfxd) 121 impl.Dgeqrf(m, na, a, lda, tau, work, lwork) 122 iws = max(iws, int(work[0])) 123 if na < n { 124 impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda, 125 work, lwork) 126 iws = max(iws, int(work[0])) 127 } 128 } 129 130 if nfxd >= minmn { 131 work[0] = float64(iws) 132 return 133 } 134 135 // Factorize free columns. 136 sm := m - nfxd 137 sn := n - nfxd 138 sminmn := minmn - nfxd 139 140 // Determine the block size. 141 nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1) 142 nbmin := 2 143 nx := 0 144 145 if 1 < nb && nb < sminmn { 146 // Determine when to cross over from blocked to unblocked code. 147 nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1)) 148 149 if nx < sminmn { 150 // Determine if workspace is large enough for blocked code. 151 minws := 2*sn + (sn+1)*nb 152 iws = max(iws, minws) 153 if lwork < minws { 154 // Not enough workspace to use optimal nb. Reduce 155 // nb and determine the minimum value of nb. 156 nb = (lwork - 2*sn) / (sn + 1) 157 nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1)) 158 } 159 } 160 } 161 162 // Initialize partial column norms. 163 // The first n elements of work store the exact column norms. 164 for j := nfxd; j < n; j++ { 165 work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda) 166 work[n+j] = work[j] 167 } 168 j := nfxd 169 if nbmin <= nb && nb < sminmn && nx < sminmn { 170 // Use blocked code initially. 171 172 // Compute factorization. 173 var fjb int 174 for topbmn := minmn - nx; j < topbmn; j += fjb { 175 jb := min(nb, topbmn-j) 176 177 // Factorize jb columns among columns j:n. 178 fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:], 179 work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb) 180 } 181 } 182 183 // Use unblocked code to factor the last or only block. 184 if j < minmn { 185 impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:], 186 work[j:n], work[j+n:2*n], work[2*n:]) 187 } 188 189 work[0] = float64(iws) 190 }