gonum.org/v1/gonum@v0.14.0/lapack/gonum/dlatdf.go (about) 1 // Copyright ©2021 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math" 9 10 "gonum.org/v1/gonum/blas/blas64" 11 "gonum.org/v1/gonum/lapack" 12 ) 13 14 // Dlatdf computes a contribution to the reciprocal Dif-estimate by solving 15 // 16 // Z * x = h - f 17 // 18 // and choosing the vector h such that the norm of x is as large as possible. 19 // 20 // The n×n matrix Z is represented by its LU factorization as computed by Dgetc2 21 // and has the form 22 // 23 // Z = P * L * U * Q 24 // 25 // where P and Q are permutation matrices, L is lower triangular with unit 26 // diagonal elements and U is upper triangular. 27 // 28 // job specifies the heuristic method for computing the contribution. 29 // 30 // If job is lapack.LocalLookAhead, all entries of h are chosen as either +1 or 31 // -1. 32 // 33 // If job is lapack.NormalizedNullVector, an approximate null-vector e of Z is 34 // computed using Dgecon and normalized. h is chosen as ±e with the sign giving 35 // the greater value of 2-norm(x). This strategy is about 5 times as expensive 36 // as LocalLookAhead. 37 // 38 // On entry, rhs holds the contribution f from earlier solved sub-systems. On 39 // return, rhs holds the solution x. 40 // 41 // ipiv and jpiv contain the pivot indices as returned by Dgetc2: row i of the 42 // matrix has been interchanged with row ipiv[i] and column j of the matrix has 43 // been interchanged with column jpiv[j]. 44 // 45 // n must be at most 8, ipiv and jpiv must have length n, and rhs must have 46 // length at least n, otherwise Dlatdf will panic. 47 // 48 // rdsum and rdscal represent the sum of squares of computed contributions to 49 // the Dif-estimate from earlier solved sub-systems. rdscal is the scaling 50 // factor used to prevent overflow in rdsum. Dlatdf returns this sum of squares 51 // updated with the contributions from the current sub-system. 52 // 53 // Dlatdf is an internal routine. It is exported for testing purposes. 54 func (impl Implementation) Dlatdf(job lapack.MaximizeNormXJob, n int, z []float64, ldz int, rhs []float64, rdsum, rdscal float64, ipiv, jpiv []int) (scale, sum float64) { 55 switch { 56 case job != lapack.LocalLookAhead && job != lapack.NormalizedNullVector: 57 panic(badMaximizeNormXJob) 58 case n < 0: 59 panic(nLT0) 60 case n > 8: 61 panic("lapack: n > 8") 62 case ldz < max(1, n): 63 panic(badLdZ) 64 } 65 66 // Quick return if possible. 67 if n == 0 { 68 return 69 } 70 71 switch { 72 case len(z) < (n-1)*ldz+n: 73 panic(shortZ) 74 case len(rhs) < n: 75 panic(shortRHS) 76 case len(ipiv) != n: 77 panic(badLenIpiv) 78 case len(jpiv) != n: 79 panic(badLenJpiv) 80 } 81 82 const maxdim = 8 83 var ( 84 xps [maxdim]float64 85 xms [maxdim]float64 86 work [4 * maxdim]float64 87 iwork [maxdim]int 88 ) 89 bi := blas64.Implementation() 90 xp := xps[:n] 91 xm := xms[:n] 92 if job == lapack.NormalizedNullVector { 93 // Compute approximate nullvector xm of Z. 94 _ = impl.Dgecon(lapack.MaxRowSum, n, z, ldz, 1, work[:], iwork[:]) 95 // This relies on undocumented content in work[n:2*n] stored by Dgecon. 96 bi.Dcopy(n, work[n:], 1, xm, 1) 97 98 // Compute rhs. 99 impl.Dlaswp(1, xm, 1, 0, n-2, ipiv[:n-1], -1) 100 tmp := 1 / bi.Dnrm2(n, xm, 1) 101 bi.Dscal(n, tmp, xm, 1) 102 bi.Dcopy(n, xm, 1, xp, 1) 103 bi.Daxpy(n, 1, rhs, 1, xp, 1) 104 bi.Daxpy(n, -1.0, xm, 1, rhs, 1) 105 _ = impl.Dgesc2(n, z, ldz, rhs, ipiv, jpiv) 106 _ = impl.Dgesc2(n, z, ldz, xp, ipiv, jpiv) 107 if bi.Dasum(n, xp, 1) > bi.Dasum(n, rhs, 1) { 108 bi.Dcopy(n, xp, 1, rhs, 1) 109 } 110 111 // Compute and return the updated sum of squares. 112 return impl.Dlassq(n, rhs, 1, rdscal, rdsum) 113 } 114 115 // Apply permutations ipiv to rhs 116 impl.Dlaswp(1, rhs, 1, 0, n-2, ipiv[:n-1], 1) 117 118 // Solve for L-part choosing rhs either to +1 or -1. 119 pmone := -1.0 120 for j := 0; j < n-2; j++ { 121 bp := rhs[j] + 1 122 bm := rhs[j] - 1 123 124 // Look-ahead for L-part rhs[0:n-2] = +1 or -1, splus and sminu computed 125 // more efficiently than in https://doi.org/10.1109/9.29404. 126 splus := 1 + bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, z[(j+1)*ldz+j:], ldz) 127 sminu := bi.Ddot(n-j-1, z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) 128 splus *= rhs[j] 129 switch { 130 case splus > sminu: 131 rhs[j] = bp 132 case sminu > splus: 133 rhs[j] = bm 134 default: 135 // In this case the updating sums are equal and we can choose rsh[j] 136 // +1 or -1. The first time this happens we choose -1, thereafter 137 // +1. This is a simple way to get good estimates of matrices like 138 // Byers well-known example (see https://doi.org/10.1109/9.29404). 139 rhs[j] += pmone 140 pmone = 1 141 } 142 143 // Compute remaining rhs. 144 bi.Daxpy(n-j-1, -rhs[j], z[(j+1)*ldz+j:], ldz, rhs[j+1:], 1) 145 } 146 147 // Solve for U-part, look-ahead for rhs[n-1] = ±1. This is not done in 148 // Bsolve and will hopefully give us a better estimate because any 149 // ill-conditioning of the original matrix is transferred to U and not to L. 150 // U[n-1,n-1] is an approximation to sigma_min(LU). 151 bi.Dcopy(n-1, rhs, 1, xp, 1) 152 xp[n-1] = rhs[n-1] + 1 153 rhs[n-1] -= 1 154 var splus, sminu float64 155 for i := n - 1; i >= 0; i-- { 156 tmp := 1 / z[i*ldz+i] 157 xp[i] *= tmp 158 rhs[i] *= tmp 159 for k := i + 1; k < n; k++ { 160 xp[i] -= xp[k] * (z[i*ldz+k] * tmp) 161 rhs[i] -= rhs[k] * (z[i*ldz+k] * tmp) 162 } 163 splus += math.Abs(xp[i]) 164 sminu += math.Abs(rhs[i]) 165 } 166 if splus > sminu { 167 bi.Dcopy(n, xp, 1, rhs, 1) 168 } 169 170 // Apply the permutations jpiv to the computed solution (rhs). 171 impl.Dlaswp(1, rhs, 1, 0, n-2, jpiv[:n-1], -1) 172 173 // Compute and return the updated sum of squares. 174 return impl.Dlassq(n, rhs, 1, rdscal, rdsum) 175 }