github.com/gopherd/gonum@v0.0.4/lapack/gonum/dpbtrf.go (about) 1 // Copyright ©2019 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "github.com/gopherd/gonum/blas" 9 "github.com/gopherd/gonum/blas/blas64" 10 ) 11 12 // Dpbtrf computes the Cholesky factorization of an n×n symmetric positive 13 // definite band matrix 14 // A = Uᵀ * U if uplo == blas.Upper 15 // A = L * Lᵀ if uplo == blas.Lower 16 // where U is an upper triangular band matrix and L is lower triangular. kd is 17 // the number of super- or sub-diagonals of A. 18 // 19 // The band storage scheme is illustrated below when n = 6 and kd = 2. Elements 20 // marked * are not used by the function. 21 // 22 // uplo == blas.Upper 23 // On entry: On return: 24 // a00 a01 a02 u00 u01 u02 25 // a11 a12 a13 u11 u12 u13 26 // a22 a23 a24 u22 u23 u24 27 // a33 a34 a35 u33 u34 u35 28 // a44 a45 * u44 u45 * 29 // a55 * * u55 * * 30 // 31 // uplo == blas.Lower 32 // On entry: On return: 33 // * * a00 * * l00 34 // * a10 a11 * l10 l11 35 // a20 a21 a22 l20 l21 l22 36 // a31 a32 a33 l31 l32 l33 37 // a42 a43 a44 l42 l43 l44 38 // a53 a54 a55 l53 l54 l55 39 func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) { 40 const nbmax = 32 41 42 switch { 43 case uplo != blas.Upper && uplo != blas.Lower: 44 panic(badUplo) 45 case n < 0: 46 panic(nLT0) 47 case kd < 0: 48 panic(kdLT0) 49 case ldab < kd+1: 50 panic(badLdA) 51 } 52 53 // Quick return if possible. 54 if n == 0 { 55 return true 56 } 57 58 if len(ab) < (n-1)*ldab+kd+1 { 59 panic(shortAB) 60 } 61 62 opts := string(blas.Upper) 63 if uplo == blas.Lower { 64 opts = string(blas.Lower) 65 } 66 nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1) 67 // The block size must not exceed the semi-bandwidth kd, and must not 68 // exceed the limit set by the size of the local array work. 69 nb = min(nb, nbmax) 70 71 if nb <= 1 || kd < nb { 72 // Use unblocked code. 73 return impl.Dpbtf2(uplo, n, kd, ab, ldab) 74 } 75 76 // Use blocked code. 77 ldwork := nb 78 work := make([]float64, nb*ldwork) 79 bi := blas64.Implementation() 80 if uplo == blas.Upper { 81 // Compute the Cholesky factorization of a symmetric band 82 // matrix, given the upper triangle of the matrix in band 83 // storage. 84 85 // Process the band matrix one diagonal block at a time. 86 for i := 0; i < n; i += nb { 87 ib := min(nb, n-i) 88 // Factorize the diagonal block. 89 ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1) 90 if !ok { 91 return false 92 } 93 if i+ib >= n { 94 continue 95 } 96 // Update the relevant part of the trailing submatrix. 97 // If A11 denotes the diagonal block which has just been 98 // factorized, then we need to update the remaining 99 // blocks in the diagram: 100 // 101 // A11 A12 A13 102 // A22 A23 103 // A33 104 // 105 // The numbers of rows and columns in the partitioning 106 // are ib, i2, i3 respectively. The blocks A12, A22 and 107 // A23 are empty if ib = kd. The upper triangle of A13 108 // lies outside the band. 109 i2 := min(kd-ib, n-i-ib) 110 if i2 > 0 { 111 // Update A12. 112 bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2, 113 1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1) 114 // Update A22. 115 bi.Dsyrk(blas.Upper, blas.Trans, i2, ib, 116 -1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1) 117 } 118 i3 := min(ib, n-i-kd) 119 if i3 > 0 { 120 // Copy the lower triangle of A13 into the work array. 121 for ii := 0; ii < ib; ii++ { 122 for jj := 0; jj <= min(ii, i3-1); jj++ { 123 work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj] 124 } 125 } 126 // Update A13 (in the work array). 127 bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3, 128 1, ab[i*ldab:], ldab-1, work, ldwork) 129 // Update A23. 130 if i2 > 0 { 131 bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib, 132 -1, ab[i*ldab+ib:], ldab-1, work, ldwork, 133 1, ab[(i+ib)*ldab+kd-ib:], ldab-1) 134 } 135 // Update A33. 136 bi.Dsyrk(blas.Upper, blas.Trans, i3, ib, 137 -1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1) 138 // Copy the lower triangle of A13 back into place. 139 for ii := 0; ii < ib; ii++ { 140 for jj := 0; jj <= min(ii, i3-1); jj++ { 141 ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj] 142 } 143 } 144 } 145 } 146 } else { 147 // Compute the Cholesky factorization of a symmetric band 148 // matrix, given the lower triangle of the matrix in band 149 // storage. 150 151 // Process the band matrix one diagonal block at a time. 152 for i := 0; i < n; i += nb { 153 ib := min(nb, n-i) 154 // Factorize the diagonal block. 155 ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1) 156 if !ok { 157 return false 158 } 159 if i+ib >= n { 160 continue 161 } 162 // Update the relevant part of the trailing submatrix. 163 // If A11 denotes the diagonal block which has just been 164 // factorized, then we need to update the remaining 165 // blocks in the diagram: 166 // 167 // A11 168 // A21 A22 169 // A31 A32 A33 170 // 171 // The numbers of rows and columns in the partitioning 172 // are ib, i2, i3 respectively. The blocks A21, A22 and 173 // A32 are empty if ib = kd. The lowr triangle of A31 174 // lies outside the band. 175 i2 := min(kd-ib, n-i-ib) 176 if i2 > 0 { 177 // Update A21. 178 bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib, 179 1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1) 180 // Update A22. 181 bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib, 182 -1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1) 183 } 184 i3 := min(ib, n-i-kd) 185 if i3 > 0 { 186 // Copy the upper triangle of A31 into the work array. 187 for ii := 0; ii < i3; ii++ { 188 for jj := ii; jj < ib; jj++ { 189 work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii] 190 } 191 } 192 // Update A31 (in the work array). 193 bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib, 194 1, ab[i*ldab+kd:], ldab-1, work, ldwork) 195 // Update A32. 196 if i2 > 0 { 197 bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib, 198 -1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1, 199 1, ab[(i+kd)*ldab+ib:], ldab-1) 200 } 201 // Update A33. 202 bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib, 203 -1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1) 204 // Copy the upper triangle of A31 back into place. 205 for ii := 0; ii < i3; ii++ { 206 for jj := ii; jj < ib; jj++ { 207 ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj] 208 } 209 } 210 } 211 } 212 } 213 return true 214 }