github.com/gopherd/gonum@v0.0.4/lapack/gonum/dgeqp3.go (about) 1 // Copyright ©2017 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "github.com/gopherd/gonum/blas" 9 "github.com/gopherd/gonum/blas/blas64" 10 ) 11 12 // Dgeqp3 computes a QR factorization with column pivoting of the 13 // m×n matrix A: A*P = Q*R using Level 3 BLAS. 14 // 15 // The matrix Q is represented as a product of elementary reflectors 16 // Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n). 17 // Each H_i has the form 18 // H_i = I - tau * v * vᵀ 19 // where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1; 20 // v[i:m] is stored on exit in A[i:m, i], and tau in tau[i]. 21 // 22 // jpvt specifies a column pivot to be applied to A. If 23 // jpvt[j] is at least zero, the jth column of A is permuted 24 // to the front of A*P (a leading column), if jpvt[j] is -1 25 // the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3 26 // will panic. On return, jpvt holds the permutation that was 27 // applied; the jth column of A*P was the jpvt[j] column of A. 28 // jpvt must have length n or Dgeqp3 will panic. 29 // 30 // tau holds the scalar factors of the elementary reflectors. 31 // It must have length min(m, n), otherwise Dgeqp3 will panic. 32 // 33 // work must have length at least max(1,lwork), and lwork must be at least 34 // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must 35 // be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return, 36 // work[0] will contain the optimal value of lwork. 37 // 38 // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork 39 // will be stored in work[0]. 40 // 41 // Dgeqp3 is an internal routine. It is exported for testing purposes. 42 func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) { 43 const ( 44 inb = 1 45 inbmin = 2 46 ixover = 3 47 ) 48 49 minmn := min(m, n) 50 iws := 3*n + 1 51 if minmn == 0 { 52 iws = 1 53 } 54 switch { 55 case m < 0: 56 panic(mLT0) 57 case n < 0: 58 panic(nLT0) 59 case lda < max(1, n): 60 panic(badLdA) 61 case lwork < iws && lwork != -1: 62 panic(badLWork) 63 case len(work) < max(1, lwork): 64 panic(shortWork) 65 } 66 67 // Quick return if possible. 68 if minmn == 0 { 69 work[0] = 1 70 return 71 } 72 73 nb := impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1) 74 if lwork == -1 { 75 work[0] = float64(2*n + (n+1)*nb) 76 return 77 } 78 79 switch { 80 case len(a) < (m-1)*lda+n: 81 panic(shortA) 82 case len(jpvt) != n: 83 panic(badLenJpvt) 84 case len(tau) < minmn: 85 panic(shortTau) 86 } 87 88 for _, v := range jpvt { 89 if v < -1 || n <= v { 90 panic(badJpvt) 91 } 92 } 93 94 bi := blas64.Implementation() 95 96 // Move initial columns up front. 97 var nfxd int 98 for j := 0; j < n; j++ { 99 if jpvt[j] == -1 { 100 jpvt[j] = j 101 continue 102 } 103 if j != nfxd { 104 bi.Dswap(m, a[j:], lda, a[nfxd:], lda) 105 jpvt[j], jpvt[nfxd] = jpvt[nfxd], j 106 } else { 107 jpvt[j] = j 108 } 109 nfxd++ 110 } 111 112 // Factorize nfxd columns. 113 // 114 // Compute the QR factorization of nfxd columns and update remaining columns. 115 if nfxd > 0 { 116 na := min(m, nfxd) 117 impl.Dgeqrf(m, na, a, lda, tau, work, lwork) 118 iws = max(iws, int(work[0])) 119 if na < n { 120 impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda, 121 work, lwork) 122 iws = max(iws, int(work[0])) 123 } 124 } 125 126 if nfxd >= minmn { 127 work[0] = float64(iws) 128 return 129 } 130 131 // Factorize free columns. 132 sm := m - nfxd 133 sn := n - nfxd 134 sminmn := minmn - nfxd 135 136 // Determine the block size. 137 nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1) 138 nbmin := 2 139 nx := 0 140 141 if 1 < nb && nb < sminmn { 142 // Determine when to cross over from blocked to unblocked code. 143 nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1)) 144 145 if nx < sminmn { 146 // Determine if workspace is large enough for blocked code. 147 minws := 2*sn + (sn+1)*nb 148 iws = max(iws, minws) 149 if lwork < minws { 150 // Not enough workspace to use optimal nb. Reduce 151 // nb and determine the minimum value of nb. 152 nb = (lwork - 2*sn) / (sn + 1) 153 nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1)) 154 } 155 } 156 } 157 158 // Initialize partial column norms. 159 // The first n elements of work store the exact column norms. 160 for j := nfxd; j < n; j++ { 161 work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda) 162 work[n+j] = work[j] 163 } 164 j := nfxd 165 if nbmin <= nb && nb < sminmn && nx < sminmn { 166 // Use blocked code initially. 167 168 // Compute factorization. 169 var fjb int 170 for topbmn := minmn - nx; j < topbmn; j += fjb { 171 jb := min(nb, topbmn-j) 172 173 // Factorize jb columns among columns j:n. 174 fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:], 175 work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb) 176 } 177 } 178 179 // Use unblocked code to factor the last or only block. 180 if j < minmn { 181 impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:], 182 work[j:n], work[j+n:2*n], work[2*n:]) 183 } 184 185 work[0] = float64(iws) 186 }