github.com/gonum/lapack@v0.0.0-20181123203213-e4cdc5a0bff9/native/dgeqp3.go (about) 1 // Copyright ©2017 The gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package native 6 7 import ( 8 "github.com/gonum/blas" 9 "github.com/gonum/blas/blas64" 10 ) 11 12 // Dgeqp3 computes a QR factorization with column pivoting of the 13 // m×n matrix A: A*P = Q*R using Level 3 BLAS. 14 // 15 // The matrix Q is represented as a product of elementary reflectors 16 // Q = H_0 H_1 . . . H_{k-1}, where k = min(m,n). 17 // Each H_i has the form 18 // H_i = I - tau * v * v^T 19 // where tau and v are real vectors with v[0:i-1] = 0 and v[i] = 1; 20 // v[i:m] is stored on exit in A[i:m, i], and tau in tau[i]. 21 // 22 // jpvt specifies a column pivot to be applied to A. If 23 // jpvt[j] is at least zero, the jth column of A is permuted 24 // to the front of A*P (a leading column), if jpvt[j] is -1 25 // the jth column of A is a free column. If jpvt[j] < -1, Dgeqp3 26 // will panic. On return, jpvt holds the permutation that was 27 // applied; the jth column of A*P was the jpvt[j] column of A. 28 // jpvt must have length n or Dgeqp3 will panic. 29 // 30 // tau holds the scalar factors of the elementary reflectors. 31 // It must have length min(m, n), otherwise Dgeqp3 will panic. 32 // 33 // work must have length at least max(1,lwork), and lwork must be at least 34 // 3*n+1, otherwise Dgeqp3 will panic. For optimal performance lwork must 35 // be at least 2*n+(n+1)*nb, where nb is the optimal blocksize. On return, 36 // work[0] will contain the optimal value of lwork. 37 // 38 // If lwork == -1, instead of performing Dgeqp3, only the optimal value of lwork 39 // will be stored in work[0]. 40 // 41 // Dgeqp3 is an internal routine. It is exported for testing purposes. 42 func (impl Implementation) Dgeqp3(m, n int, a []float64, lda int, jpvt []int, tau, work []float64, lwork int) { 43 const ( 44 inb = 1 45 inbmin = 2 46 ixover = 3 47 ) 48 checkMatrix(m, n, a, lda) 49 50 if len(jpvt) != n { 51 panic(badIpiv) 52 } 53 for _, v := range jpvt { 54 if v < -1 || n <= v { 55 panic("lapack: jpvt element out of range") 56 } 57 } 58 minmn := min(m, n) 59 if len(work) < max(1, lwork) { 60 panic(badWork) 61 } 62 63 var iws, lwkopt, nb int 64 if minmn == 0 { 65 iws = 1 66 lwkopt = 1 67 } else { 68 iws = 3*n + 1 69 nb = impl.Ilaenv(inb, "DGEQRF", " ", m, n, -1, -1) 70 lwkopt = 2*n + (n+1)*nb 71 } 72 work[0] = float64(lwkopt) 73 74 if lwork == -1 { 75 return 76 } 77 78 if len(tau) < minmn { 79 panic(badTau) 80 } 81 82 bi := blas64.Implementation() 83 84 // Move initial columns up front. 85 var nfxd int 86 for j := 0; j < n; j++ { 87 if jpvt[j] == -1 { 88 jpvt[j] = j 89 continue 90 } 91 if j != nfxd { 92 bi.Dswap(m, a[j:], lda, a[nfxd:], lda) 93 jpvt[j], jpvt[nfxd] = jpvt[nfxd], j 94 } else { 95 jpvt[j] = j 96 } 97 nfxd++ 98 } 99 100 // Factorize nfxd columns. 101 // 102 // Compute the QR factorization of nfxd columns and update remaining columns. 103 if nfxd > 0 { 104 na := min(m, nfxd) 105 impl.Dgeqrf(m, na, a, lda, tau, work, lwork) 106 iws = max(iws, int(work[0])) 107 if na < n { 108 impl.Dormqr(blas.Left, blas.Trans, m, n-na, na, a, lda, tau[:na], a[na:], lda, 109 work, lwork) 110 iws = max(iws, int(work[0])) 111 } 112 } 113 114 if nfxd >= minmn { 115 work[0] = float64(iws) 116 return 117 } 118 119 // Factorize free columns. 120 sm := m - nfxd 121 sn := n - nfxd 122 sminmn := minmn - nfxd 123 124 // Determine the block size. 125 nb = impl.Ilaenv(inb, "DGEQRF", " ", sm, sn, -1, -1) 126 nbmin := 2 127 nx := 0 128 129 if 1 < nb && nb < sminmn { 130 // Determine when to cross over from blocked to unblocked code. 131 nx = max(0, impl.Ilaenv(ixover, "DGEQRF", " ", sm, sn, -1, -1)) 132 133 if nx < sminmn { 134 // Determine if workspace is large enough for blocked code. 135 minws := 2*sn + (sn+1)*nb 136 iws = max(iws, minws) 137 if lwork < minws { 138 // Not enough workspace to use optimal nb. Reduce 139 // nb and determine the minimum value of nb. 140 nb = (lwork - 2*sn) / (sn + 1) 141 nbmin = max(2, impl.Ilaenv(inbmin, "DGEQRF", " ", sm, sn, -1, -1)) 142 } 143 } 144 } 145 146 // Initialize partial column norms. 147 // The first n elements of work store the exact column norms. 148 for j := nfxd; j < n; j++ { 149 work[j] = bi.Dnrm2(sm, a[nfxd*lda+j:], lda) 150 work[n+j] = work[j] 151 } 152 j := nfxd 153 if nbmin <= nb && nb < sminmn && nx < sminmn { 154 // Use blocked code initially. 155 156 // Compute factorization. 157 var fjb int 158 for topbmn := minmn - nx; j < topbmn; j += fjb { 159 jb := min(nb, topbmn-j) 160 161 // Factorize jb columns among columns j:n. 162 fjb = impl.Dlaqps(m, n-j, j, jb, a[j:], lda, jpvt[j:], tau[j:], 163 work[j:n], work[j+n:2*n], work[2*n:2*n+jb], work[2*n+jb:], jb) 164 } 165 } 166 167 // Use unblocked code to factor the last or only block. 168 if j < minmn { 169 impl.Dlaqp2(m, n-j, j, a[j:], lda, jpvt[j:], tau[j:], 170 work[j:n], work[j+n:2*n], work[2*n:]) 171 } 172 173 work[0] = float64(iws) 174 }