gonum.org/v1/gonum@v0.14.0/lapack/gonum/dpbtrf.go (about)

     1  // Copyright ©2019 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package gonum
     6  
     7  import (
     8  	"gonum.org/v1/gonum/blas"
     9  	"gonum.org/v1/gonum/blas/blas64"
    10  )
    11  
    12  // Dpbtrf computes the Cholesky factorization of an n×n symmetric positive
    13  // definite band matrix
    14  //
    15  //	A = Uᵀ * U  if uplo == blas.Upper
    16  //	A = L * Lᵀ  if uplo == blas.Lower
    17  //
    18  // where U is an upper triangular band matrix and L is lower triangular. kd is
    19  // the number of super- or sub-diagonals of A.
    20  //
    21  // The band storage scheme is illustrated below when n = 6 and kd = 2. Elements
    22  // marked * are not used by the function.
    23  //
    24  //	uplo == blas.Upper
    25  //	On entry:         On return:
    26  //	 a00  a01  a02     u00  u01  u02
    27  //	 a11  a12  a13     u11  u12  u13
    28  //	 a22  a23  a24     u22  u23  u24
    29  //	 a33  a34  a35     u33  u34  u35
    30  //	 a44  a45   *      u44  u45   *
    31  //	 a55   *    *      u55   *    *
    32  //
    33  //	uplo == blas.Lower
    34  //	On entry:         On return:
    35  //	  *    *   a00       *    *   l00
    36  //	  *   a10  a11       *   l10  l11
    37  //	 a20  a21  a22      l20  l21  l22
    38  //	 a31  a32  a33      l31  l32  l33
    39  //	 a42  a43  a44      l42  l43  l44
    40  //	 a53  a54  a55      l53  l54  l55
    41  func (impl Implementation) Dpbtrf(uplo blas.Uplo, n, kd int, ab []float64, ldab int) (ok bool) {
    42  	const nbmax = 32
    43  
    44  	switch {
    45  	case uplo != blas.Upper && uplo != blas.Lower:
    46  		panic(badUplo)
    47  	case n < 0:
    48  		panic(nLT0)
    49  	case kd < 0:
    50  		panic(kdLT0)
    51  	case ldab < kd+1:
    52  		panic(badLdA)
    53  	}
    54  
    55  	// Quick return if possible.
    56  	if n == 0 {
    57  		return true
    58  	}
    59  
    60  	if len(ab) < (n-1)*ldab+kd+1 {
    61  		panic(shortAB)
    62  	}
    63  
    64  	opts := string(blas.Upper)
    65  	if uplo == blas.Lower {
    66  		opts = string(blas.Lower)
    67  	}
    68  	nb := impl.Ilaenv(1, "DPBTRF", opts, n, kd, -1, -1)
    69  	// The block size must not exceed the semi-bandwidth kd, and must not
    70  	// exceed the limit set by the size of the local array work.
    71  	nb = min(nb, nbmax)
    72  
    73  	if nb <= 1 || kd < nb {
    74  		// Use unblocked code.
    75  		return impl.Dpbtf2(uplo, n, kd, ab, ldab)
    76  	}
    77  
    78  	// Use blocked code.
    79  	ldwork := nb
    80  	work := make([]float64, nb*ldwork)
    81  	bi := blas64.Implementation()
    82  	if uplo == blas.Upper {
    83  		// Compute the Cholesky factorization of a symmetric band
    84  		// matrix, given the upper triangle of the matrix in band
    85  		// storage.
    86  
    87  		// Process the band matrix one diagonal block at a time.
    88  		for i := 0; i < n; i += nb {
    89  			ib := min(nb, n-i)
    90  			// Factorize the diagonal block.
    91  			ok := impl.Dpotf2(uplo, ib, ab[i*ldab:], ldab-1)
    92  			if !ok {
    93  				return false
    94  			}
    95  			if i+ib >= n {
    96  				continue
    97  			}
    98  			// Update the relevant part of the trailing submatrix.
    99  			// If A11 denotes the diagonal block which has just been
   100  			// factorized, then we need to update the remaining
   101  			// blocks in the diagram:
   102  			//
   103  			//  A11   A12   A13
   104  			//        A22   A23
   105  			//              A33
   106  			//
   107  			// The numbers of rows and columns in the partitioning
   108  			// are ib, i2, i3 respectively. The blocks A12, A22 and
   109  			// A23 are empty if ib = kd. The upper triangle of A13
   110  			// lies outside the band.
   111  			i2 := min(kd-ib, n-i-ib)
   112  			if i2 > 0 {
   113  				// Update A12.
   114  				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i2,
   115  					1, ab[i*ldab:], ldab-1, ab[i*ldab+ib:], ldab-1)
   116  				// Update A22.
   117  				bi.Dsyrk(blas.Upper, blas.Trans, i2, ib,
   118  					-1, ab[i*ldab+ib:], ldab-1, 1, ab[(i+ib)*ldab:], ldab-1)
   119  			}
   120  			i3 := min(ib, n-i-kd)
   121  			if i3 > 0 {
   122  				// Copy the lower triangle of A13 into the work array.
   123  				for ii := 0; ii < ib; ii++ {
   124  					for jj := 0; jj <= min(ii, i3-1); jj++ {
   125  						work[ii*ldwork+jj] = ab[(i+ii)*ldab+kd-ii+jj]
   126  					}
   127  				}
   128  				// Update A13 (in the work array).
   129  				bi.Dtrsm(blas.Left, blas.Upper, blas.Trans, blas.NonUnit, ib, i3,
   130  					1, ab[i*ldab:], ldab-1, work, ldwork)
   131  				// Update A23.
   132  				if i2 > 0 {
   133  					bi.Dgemm(blas.Trans, blas.NoTrans, i2, i3, ib,
   134  						-1, ab[i*ldab+ib:], ldab-1, work, ldwork,
   135  						1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
   136  				}
   137  				// Update A33.
   138  				bi.Dsyrk(blas.Upper, blas.Trans, i3, ib,
   139  					-1, work, ldwork, 1, ab[(i+kd)*ldab:], ldab-1)
   140  				// Copy the lower triangle of A13 back into place.
   141  				for ii := 0; ii < ib; ii++ {
   142  					for jj := 0; jj <= min(ii, i3-1); jj++ {
   143  						ab[(i+ii)*ldab+kd-ii+jj] = work[ii*ldwork+jj]
   144  					}
   145  				}
   146  			}
   147  		}
   148  	} else {
   149  		// Compute the Cholesky factorization of a symmetric band
   150  		// matrix, given the lower triangle of the matrix in band
   151  		// storage.
   152  
   153  		// Process the band matrix one diagonal block at a time.
   154  		for i := 0; i < n; i += nb {
   155  			ib := min(nb, n-i)
   156  			// Factorize the diagonal block.
   157  			ok := impl.Dpotf2(uplo, ib, ab[i*ldab+kd:], ldab-1)
   158  			if !ok {
   159  				return false
   160  			}
   161  			if i+ib >= n {
   162  				continue
   163  			}
   164  			// Update the relevant part of the trailing submatrix.
   165  			// If A11 denotes the diagonal block which has just been
   166  			// factorized, then we need to update the remaining
   167  			// blocks in the diagram:
   168  			//
   169  			//  A11
   170  			//  A21   A22
   171  			//  A31   A32   A33
   172  			//
   173  			// The numbers of rows and columns in the partitioning
   174  			// are ib, i2, i3 respectively. The blocks A21, A22 and
   175  			// A32 are empty if ib = kd. The lowr triangle of A31
   176  			// lies outside the band.
   177  			i2 := min(kd-ib, n-i-ib)
   178  			if i2 > 0 {
   179  				// Update A21.
   180  				bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i2, ib,
   181  					1, ab[i*ldab+kd:], ldab-1, ab[(i+ib)*ldab+kd-ib:], ldab-1)
   182  				// Update A22.
   183  				bi.Dsyrk(blas.Lower, blas.NoTrans, i2, ib,
   184  					-1, ab[(i+ib)*ldab+kd-ib:], ldab-1, 1, ab[(i+ib)*ldab+kd:], ldab-1)
   185  			}
   186  			i3 := min(ib, n-i-kd)
   187  			if i3 > 0 {
   188  				// Copy the upper triangle of A31 into the work array.
   189  				for ii := 0; ii < i3; ii++ {
   190  					for jj := ii; jj < ib; jj++ {
   191  						work[ii*ldwork+jj] = ab[(ii+i+kd)*ldab+jj-ii]
   192  					}
   193  				}
   194  				// Update A31 (in the work array).
   195  				bi.Dtrsm(blas.Right, blas.Lower, blas.Trans, blas.NonUnit, i3, ib,
   196  					1, ab[i*ldab+kd:], ldab-1, work, ldwork)
   197  				// Update A32.
   198  				if i2 > 0 {
   199  					bi.Dgemm(blas.NoTrans, blas.Trans, i3, i2, ib,
   200  						-1, work, ldwork, ab[(i+ib)*ldab+kd-ib:], ldab-1,
   201  						1, ab[(i+kd)*ldab+ib:], ldab-1)
   202  				}
   203  				// Update A33.
   204  				bi.Dsyrk(blas.Lower, blas.NoTrans, i3, ib,
   205  					-1, work, ldwork, 1, ab[(i+kd)*ldab+kd:], ldab-1)
   206  				// Copy the upper triangle of A31 back into place.
   207  				for ii := 0; ii < i3; ii++ {
   208  					for jj := ii; jj < ib; jj++ {
   209  						ab[(ii+i+kd)*ldab+jj-ii] = work[ii*ldwork+jj]
   210  					}
   211  				}
   212  			}
   213  		}
   214  	}
   215  	return true
   216  }