gonum.org/v1/gonum@v0.14.0/lapack/gonum/dlaqps.go (about) 1 // Copyright ©2017 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math" 9 10 "gonum.org/v1/gonum/blas" 11 "gonum.org/v1/gonum/blas/blas64" 12 ) 13 14 // Dlaqps computes a step of QR factorization with column pivoting 15 // of an m×n matrix A by using Blas-3. It tries to factorize nb 16 // columns from A starting from the row offset, and updates all 17 // of the matrix with Dgemm. 18 // 19 // In some cases, due to catastrophic cancellations, it cannot 20 // factorize nb columns. Hence, the actual number of factorized 21 // columns is returned in kb. 22 // 23 // Dlaqps computes a QR factorization with column pivoting of the 24 // block A[offset:m, 0:nb] of the m×n matrix A. The block 25 // A[0:offset, 0:n] is accordingly pivoted, but not factorized. 26 // 27 // On exit, the upper triangle of block A[offset:m, 0:kb] is the 28 // triangular factor obtained. The elements in block A[offset:m, 0:n] 29 // below the diagonal, together with tau, represent the orthogonal 30 // matrix Q as a product of elementary reflectors. 31 // 32 // offset is number of rows of the matrix A that must be pivoted but 33 // not factorized. offset must not be negative otherwise Dlaqps will panic. 34 // 35 // On exit, jpvt holds the permutation that was applied; the jth column 36 // of A*P was the jpvt[j] column of A. jpvt must have length n, 37 // otherwise Dlapqs will panic. 38 // 39 // On exit tau holds the scalar factors of the elementary reflectors. 40 // It must have length nb, otherwise Dlapqs will panic. 41 // 42 // vn1 and vn2 hold the partial and complete column norms respectively. 43 // They must have length n, otherwise Dlapqs will panic. 44 // 45 // auxv must have length nb, otherwise Dlaqps will panic. 46 // 47 // f and ldf represent an n×nb matrix F that is overwritten during the 48 // call. 49 // 50 // Dlaqps is an internal routine. It is exported for testing purposes. 51 func (impl Implementation) Dlaqps(m, n, offset, nb int, a []float64, lda int, jpvt []int, tau, vn1, vn2, auxv, f []float64, ldf int) (kb int) { 52 switch { 53 case m < 0: 54 panic(mLT0) 55 case n < 0: 56 panic(nLT0) 57 case offset < 0: 58 panic(offsetLT0) 59 case offset > m: 60 panic(offsetGTM) 61 case nb < 0: 62 panic(nbLT0) 63 case nb > n: 64 panic(nbGTN) 65 case lda < max(1, n): 66 panic(badLdA) 67 case ldf < max(1, nb): 68 panic(badLdF) 69 } 70 71 if m == 0 || n == 0 { 72 return 0 73 } 74 75 switch { 76 case len(a) < (m-1)*lda+n: 77 panic(shortA) 78 case len(jpvt) != n: 79 panic(badLenJpvt) 80 case len(vn1) < n: 81 panic(shortVn1) 82 case len(vn2) < n: 83 panic(shortVn2) 84 } 85 86 if nb == 0 { 87 return 0 88 } 89 90 switch { 91 case len(tau) < nb: 92 panic(shortTau) 93 case len(auxv) < nb: 94 panic(shortAuxv) 95 case len(f) < (n-1)*ldf+nb: 96 panic(shortF) 97 } 98 99 if offset == m { 100 return 0 101 } 102 103 lastrk := min(m, n+offset) 104 lsticc := -1 105 tol3z := math.Sqrt(dlamchE) 106 107 bi := blas64.Implementation() 108 109 var k, rk int 110 for ; k < nb && lsticc == -1; k++ { 111 rk = offset + k 112 113 // Determine kth pivot column and swap if necessary. 114 p := k + bi.Idamax(n-k, vn1[k:], 1) 115 if p != k { 116 bi.Dswap(m, a[p:], lda, a[k:], lda) 117 bi.Dswap(k, f[p*ldf:], 1, f[k*ldf:], 1) 118 jpvt[p], jpvt[k] = jpvt[k], jpvt[p] 119 vn1[p] = vn1[k] 120 vn2[p] = vn2[k] 121 } 122 123 // Apply previous Householder reflectors to column K: 124 // 125 // A[rk:m, k] = A[rk:m, k] - A[rk:m, 0:k-1]*F[k, 0:k-1]ᵀ. 126 if k > 0 { 127 bi.Dgemv(blas.NoTrans, m-rk, k, -1, 128 a[rk*lda:], lda, 129 f[k*ldf:], 1, 130 1, 131 a[rk*lda+k:], lda) 132 } 133 134 // Generate elementary reflector H_k. 135 if rk < m-1 { 136 a[rk*lda+k], tau[k] = impl.Dlarfg(m-rk, a[rk*lda+k], a[(rk+1)*lda+k:], lda) 137 } else { 138 tau[k] = 0 139 } 140 141 akk := a[rk*lda+k] 142 a[rk*lda+k] = 1 143 144 // Compute kth column of F: 145 // 146 // Compute F[k+1:n, k] = tau[k]*A[rk:m, k+1:n]ᵀ*A[rk:m, k]. 147 if k < n-1 { 148 bi.Dgemv(blas.Trans, m-rk, n-k-1, tau[k], 149 a[rk*lda+k+1:], lda, 150 a[rk*lda+k:], lda, 151 0, 152 f[(k+1)*ldf+k:], ldf) 153 } 154 155 // Padding F[0:k, k] with zeros. 156 for j := 0; j < k; j++ { 157 f[j*ldf+k] = 0 158 } 159 160 // Incremental updating of F: 161 // 162 // F[0:n, k] := F[0:n, k] - tau[k]*F[0:n, 0:k-1]*A[rk:m, 0:k-1]ᵀ*A[rk:m,k]. 163 if k > 0 { 164 bi.Dgemv(blas.Trans, m-rk, k, -tau[k], 165 a[rk*lda:], lda, 166 a[rk*lda+k:], lda, 167 0, 168 auxv, 1) 169 bi.Dgemv(blas.NoTrans, n, k, 1, 170 f, ldf, 171 auxv, 1, 172 1, 173 f[k:], ldf) 174 } 175 176 // Update the current row of A: 177 // 178 // A[rk, k+1:n] = A[rk, k+1:n] - A[rk, 0:k]*F[k+1:n, 0:k]ᵀ. 179 if k < n-1 { 180 bi.Dgemv(blas.NoTrans, n-k-1, k+1, -1, 181 f[(k+1)*ldf:], ldf, 182 a[rk*lda:], 1, 183 1, 184 a[rk*lda+k+1:], 1) 185 } 186 187 // Update partial column norms. 188 if rk < lastrk-1 { 189 for j := k + 1; j < n; j++ { 190 if vn1[j] == 0 { 191 continue 192 } 193 194 // The following marked lines follow from the 195 // analysis in Lapack Working Note 176. 196 r := math.Abs(a[rk*lda+j]) / vn1[j] // * 197 temp := math.Max(0, 1-r*r) // * 198 r = vn1[j] / vn2[j] // * 199 temp2 := temp * r * r // * 200 if temp2 < tol3z { 201 // vn2 is used here as a collection of 202 // indices into vn2 and also a collection 203 // of column norms. 204 vn2[j] = float64(lsticc) 205 lsticc = j 206 } else { 207 vn1[j] *= math.Sqrt(temp) // * 208 } 209 } 210 } 211 212 a[rk*lda+k] = akk 213 } 214 kb = k 215 rk = offset + kb 216 217 // Apply the block reflector to the rest of the matrix: 218 // 219 // A[offset+kb+1:m, kb+1:n] := A[offset+kb+1:m, kb+1:n] - A[offset+kb+1:m, 1:kb]*F[kb+1:n, 1:kb]ᵀ. 220 if kb < min(n, m-offset) { 221 bi.Dgemm(blas.NoTrans, blas.Trans, 222 m-rk, n-kb, kb, -1, 223 a[rk*lda:], lda, 224 f[kb*ldf:], ldf, 225 1, 226 a[rk*lda+kb:], lda) 227 } 228 229 // Recomputation of difficult columns. 230 for lsticc >= 0 { 231 itemp := int(vn2[lsticc]) 232 233 // NOTE: The computation of vn1[lsticc] relies on the fact that 234 // Dnrm2 does not fail on vectors with norm below the value of 235 // sqrt(dlamchS) 236 v := bi.Dnrm2(m-rk, a[rk*lda+lsticc:], lda) 237 vn1[lsticc] = v 238 vn2[lsticc] = v 239 240 lsticc = itemp 241 } 242 243 return kb 244 }