github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/lapack/gonum/dpbtrf.go (about)

     1  // Copyright ©2019 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"github.com/jingcheng-WU/gonum/blas"
     9  	"github.com/jingcheng-WU/gonum/blas/blas64"
    10  )
    11  
    12  // Dpbtrf computes the Cholesky factorization of an n×n symmetric positive
    13  // definite band matrix
    14  //  A = Uᵀ * U  if uplo == blas.Upper
    15  //  A = L * Lᵀ  if uplo == blas.Lower
    16  // where U is an upper triangular band matrix and L is lower triangular. kd is
    17  // the number of super- or sub-diagonals of A.
    18  //
    19  // The band storage scheme is illustrated below when n = 6 and kd = 2. Elements
    20  // marked * are not used by the function.
    21  //
    22  //  uplo == blas.Upper
    23  //  On entry:         On return:
    24  //   a00  a01  a02     u00  u01  u02
    25  //   a11  a12  a13     u11  u12  u13
    26  //   a22  a23  a24     u22  u23  u24
    27  //   a33  a34  a35     u33  u34  u35
    28  //   a44  a45   *      u44  u45   *
    29  //   a55   *    *      u55   *    *
    30  //
    31  //  uplo == blas.Lower
    32  //  On entry:         On return:
    33  //    *    *   a00       *    *   l00
    34  //    *   a10  a11       *   l10  l11
    35  //   a20  a21  a22      l20  l21  l22
    36  //   a31  a32  a33      l31  l32  l33
    37  //   a42  a43  a44      l42  l43  l44
    38  //   a53  a54  a55      l53  l54  l55
    39  func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) {
    40  	const nbmax = 32
    41  
    42  	switch {
    43  	case uplo != blas.Upper && uplo != blas.Lower:
    44  		panic(badUplo)
    45  	case n < 0:
    46  		panic(nLT0)
    47  	case kd < 0:
    48  		panic(kdLT0)
    49  	case ldab < kd+1:
    50  		panic(badLdA)
    51  	}
    52  
    53  	// Quick return if possible.
    54  	if n == 0 {
    55  		return true
    56  	}
    57  
    58  	if len(ab) < (n-1)*ldab+kd+1 {
    59  		panic(shortAB)
    60  	}
    61  
    62  	opts := string(blas.Upper)
    63  	if uplo == blas.Lower {
    64  		opts = string(blas.Lower)
    65  	}
    66  	nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1)
    67  	// The block size must not exceed the semi-bandwidth kd, and must not
    68  	// exceed the limit set by the size of the local array work.
    69  	nb = min(nb, nbmax)
    70  
    71  	if nb <= 1 || kd < nb {
    72  		// Use unblocked code.
    73  		return impl.Dpbtf2(uplo, n, kd, ab, ldab)
    74  	}
    75  
    76  	// Use blocked code.
    77  	ldwork := nb
    78  	work := make([]float64, nb*ldwork)
    79  	bi := blas64.Implementation()
    80  	if uplo == blas.Upper {
    81  		// Compute the Cholesky factorization of a symmetric band
    82  		// matrix, given the upper triangle of the matrix in band
    83  		// storage.
    84  
    85  		// Process the band matrix one diagonal block at a time.
    86  		for i := 0; i < n; i += nb {
    87  			ib := min(nb, n-i)
    88  			// Factorize the diagonal block.
    89  			ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1)
    90  			if !ok {
    91  				return false
    92  			}
    93  			if i+ib >= n {
    94  				continue
    95  			}
    96  			// Update the relevant part of the trailing submatrix.
    97  			// If A11 denotes the diagonal block which has just been
    98  			// factorized, then we need to update the remaining
    99  			// blocks in the diagram:
   100  			//
   101  			//  A11   A12   A13
   102  			//        A22   A23
   103  			//              A33
   104  			//
   105  			// The numbers of rows and columns in the partitioning
   106  			// are ib, i2, i3 respectively. The blocks A12, A22 and
   107  			// A23 are empty if ib = kd. The upper triangle of A13
   108  			// lies outside the band.
   109  			i2 := min(kd-ib, n-i-ib)
   110  			if i2 > 0 {
   111  				// Update A12.
   112  				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2,
   113  					1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1)
   114  				// Update A22.
   115  				bi.Dsyrk(blas.Upper, blas.Trans, i2, ib,
   116  					-1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1)
   117  			}
   118  			i3 := min(ib, n-i-kd)
   119  			if i3 > 0 {
   120  				// Copy the lower triangle of A13 into the work array.
   121  				for ii := 0; ii < ib; ii++ {
   122  					for jj := 0; jj <= min(ii, i3-1); jj++ {
   123  						work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj]
   124  					}
   125  				}
   126  				// Update A13 (in the work array).
   127  				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3,
   128  					1, ab[i*ldab:], ldab-1, work, ldwork)
   129  				// Update A23.
   130  				if i2 > 0 {
   131  					bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib,
   132  						-1, ab[i*ldab+ib:], ldab-1, work, ldwork,
   133  						1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
   134  				}
   135  				// Update A33.
   136  				bi.Dsyrk(blas.Upper, blas.Trans, i3, ib,
   137  					-1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1)
   138  				// Copy the lower triangle of A13 back into place.
   139  				for ii := 0; ii < ib; ii++ {
   140  					for jj := 0; jj <= min(ii, i3-1); jj++ {
   141  						ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj]
   142  					}
   143  				}
   144  			}
   145  		}
   146  	} else {
   147  		// Compute the Cholesky factorization of a symmetric band
   148  		// matrix, given the lower triangle of the matrix in band
   149  		// storage.
   150  
   151  		// Process the band matrix one diagonal block at a time.
   152  		for i := 0; i < n; i += nb {
   153  			ib := min(nb, n-i)
   154  			// Factorize the diagonal block.
   155  			ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1)
   156  			if !ok {
   157  				return false
   158  			}
   159  			if i+ib >= n {
   160  				continue
   161  			}
   162  			// Update the relevant part of the trailing submatrix.
   163  			// If A11 denotes the diagonal block which has just been
   164  			// factorized, then we need to update the remaining
   165  			// blocks in the diagram:
   166  			//
   167  			//  A11
   168  			//  A21   A22
   169  			//  A31   A32   A33
   170  			//
   171  			// The numbers of rows and columns in the partitioning
   172  			// are ib, i2, i3 respectively. The blocks A21, A22 and
   173  			// A32 are empty if ib = kd. The lowr triangle of A31
   174  			// lies outside the band.
   175  			i2 := min(kd-ib, n-i-ib)
   176  			if i2 > 0 {
   177  				// Update A21.
   178  				bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib,
   179  					1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
   180  				// Update A22.
   181  				bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib,
   182  					-1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1)
   183  			}
   184  			i3 := min(ib, n-i-kd)
   185  			if i3 > 0 {
   186  				// Copy the upper triangle of A31 into the work array.
   187  				for ii := 0; ii < i3; ii++ {
   188  					for jj := ii; jj < ib; jj++ {
   189  						work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii]
   190  					}
   191  				}
   192  				// Update A31 (in the work array).
   193  				bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib,
   194  					1, ab[i*ldab+kd:], ldab-1, work, ldwork)
   195  				// Update A32.
   196  				if i2 > 0 {
   197  					bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib,
   198  						-1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1,
   199  						1, ab[(i+kd)*ldab+ib:], ldab-1)
   200  				}
   201  				// Update A33.
   202  				bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib,
   203  					-1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1)
   204  				// Copy the upper triangle of A31 back into place.
   205  				for ii := 0; ii < i3; ii++ {
   206  					for jj := ii; jj < ib; jj++ {
   207  						ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj]
   208  					}
   209  				}
   210  			}
   211  		}
   212  	}
   213  	return true
   214  }