gonum.org/v1/gonum@v0.14.0/lapack/gonum/dpbtrf.go (about) 1 // Copyright ©2019 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "gonum.org/v1/gonum/blas" 9 "gonum.org/v1/gonum/blas/blas64" 10 ) 11 12 // Dpbtrf computes the Cholesky factorization of an n×n symmetric positive 13 // definite band matrix 14 // 15 // A = Uᵀ * U if uplo == blas.Upper 16 // A = L * Lᵀ if uplo == blas.Lower 17 // 18 // where U is an upper triangular band matrix and L is lower triangular. kd is 19 // the number of super- or sub-diagonals of A. 20 // 21 // The band storage scheme is illustrated below when n = 6 and kd = 2. Elements 22 // marked * are not used by the function. 23 // 24 // uplo == blas.Upper 25 // On entry: On return: 26 // a00 a01 a02 u00 u01 u02 27 // a11 a12 a13 u11 u12 u13 28 // a22 a23 a24 u22 u23 u24 29 // a33 a34 a35 u33 u34 u35 30 // a44 a45 * u44 u45 * 31 // a55 * * u55 * * 32 // 33 // uplo == blas.Lower 34 // On entry: On return: 35 // * * a00 * * l00 36 // * a10 a11 * l10 l11 37 // a20 a21 a22 l20 l21 l22 38 // a31 a32 a33 l31 l32 l33 39 // a42 a43 a44 l42 l43 l44 40 // a53 a54 a55 l53 l54 l55 41 func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) { 42 const nbmax = 32 43 44 switch { 45 case uplo != blas.Upper && uplo != blas.Lower: 46 panic(badUplo) 47 case n < 0: 48 panic(nLT0) 49 case kd < 0: 50 panic(kdLT0) 51 case ldab < kd+1: 52 panic(badLdA) 53 } 54 55 // Quick return if possible. 56 if n == 0 { 57 return true 58 } 59 60 if len(ab) < (n-1)*ldab+kd+1 { 61 panic(shortAB) 62 } 63 64 opts := string(blas.Upper) 65 if uplo == blas.Lower { 66 opts = string(blas.Lower) 67 } 68 nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1) 69 // The block size must not exceed the semi-bandwidth kd, and must not 70 // exceed the limit set by the size of the local array work. 71 nb = min(nb, nbmax) 72 73 if nb <= 1 || kd < nb { 74 // Use unblocked code. 75 return impl.Dpbtf2(uplo, n, kd, ab, ldab) 76 } 77 78 // Use blocked code. 79 ldwork := nb 80 work := make([]float64, nb*ldwork) 81 bi := blas64.Implementation() 82 if uplo == blas.Upper { 83 // Compute the Cholesky factorization of a symmetric band 84 // matrix, given the upper triangle of the matrix in band 85 // storage. 86 87 // Process the band matrix one diagonal block at a time. 88 for i := 0; i < n; i += nb { 89 ib := min(nb, n-i) 90 // Factorize the diagonal block. 91 ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1) 92 if !ok { 93 return false 94 } 95 if i+ib >= n { 96 continue 97 } 98 // Update the relevant part of the trailing submatrix. 99 // If A11 denotes the diagonal block which has just been 100 // factorized, then we need to update the remaining 101 // blocks in the diagram: 102 // 103 // A11 A12 A13 104 // A22 A23 105 // A33 106 // 107 // The numbers of rows and columns in the partitioning 108 // are ib, i2, i3 respectively. The blocks A12, A22 and 109 // A23 are empty if ib = kd. The upper triangle of A13 110 // lies outside the band. 111 i2 := min(kd-ib, n-i-ib) 112 if i2 > 0 { 113 // Update A12. 114 bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2, 115 1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1) 116 // Update A22. 117 bi.Dsyrk(blas.Upper, blas.Trans, i2, ib, 118 -1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1) 119 } 120 i3 := min(ib, n-i-kd) 121 if i3 > 0 { 122 // Copy the lower triangle of A13 into the work array. 123 for ii := 0; ii < ib; ii++ { 124 for jj := 0; jj <= min(ii, i3-1); jj++ { 125 work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj] 126 } 127 } 128 // Update A13 (in the work array). 129 bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3, 130 1, ab[i*ldab:], ldab-1, work, ldwork) 131 // Update A23. 132 if i2 > 0 { 133 bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib, 134 -1, ab[i*ldab+ib:], ldab-1, work, ldwork, 135 1, ab[(i+ib)*ldab+kd-ib:], ldab-1) 136 } 137 // Update A33. 138 bi.Dsyrk(blas.Upper, blas.Trans, i3, ib, 139 -1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1) 140 // Copy the lower triangle of A13 back into place. 141 for ii := 0; ii < ib; ii++ { 142 for jj := 0; jj <= min(ii, i3-1); jj++ { 143 ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj] 144 } 145 } 146 } 147 } 148 } else { 149 // Compute the Cholesky factorization of a symmetric band 150 // matrix, given the lower triangle of the matrix in band 151 // storage. 152 153 // Process the band matrix one diagonal block at a time. 154 for i := 0; i < n; i += nb { 155 ib := min(nb, n-i) 156 // Factorize the diagonal block. 157 ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1) 158 if !ok { 159 return false 160 } 161 if i+ib >= n { 162 continue 163 } 164 // Update the relevant part of the trailing submatrix. 165 // If A11 denotes the diagonal block which has just been 166 // factorized, then we need to update the remaining 167 // blocks in the diagram: 168 // 169 // A11 170 // A21 A22 171 // A31 A32 A33 172 // 173 // The numbers of rows and columns in the partitioning 174 // are ib, i2, i3 respectively. The blocks A21, A22 and 175 // A32 are empty if ib = kd. The lowr triangle of A31 176 // lies outside the band. 177 i2 := min(kd-ib, n-i-ib) 178 if i2 > 0 { 179 // Update A21. 180 bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib, 181 1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1) 182 // Update A22. 183 bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib, 184 -1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1) 185 } 186 i3 := min(ib, n-i-kd) 187 if i3 > 0 { 188 // Copy the upper triangle of A31 into the work array. 189 for ii := 0; ii < i3; ii++ { 190 for jj := ii; jj < ib; jj++ { 191 work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii] 192 } 193 } 194 // Update A31 (in the work array). 195 bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib, 196 1, ab[i*ldab+kd:], ldab-1, work, ldwork) 197 // Update A32. 198 if i2 > 0 { 199 bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib, 200 -1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1, 201 1, ab[(i+kd)*ldab+ib:], ldab-1) 202 } 203 // Update A33. 204 bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib, 205 -1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1) 206 // Copy the upper triangle of A31 back into place. 207 for ii := 0; ii < i3; ii++ { 208 for jj := ii; jj < ib; jj++ { 209 ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj] 210 } 211 } 212 } 213 } 214 } 215 return true 216 }